Search in sources :

Example 36 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class UncertainifyFilter method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    if (objects.dataLength() == 0) {
        return objects;
    }
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int r = 0; r < objects.metaLength(); r++) {
        SimpleTypeInformation<?> type = objects.meta(r);
        @SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
        if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        // Get the replacement type information
        @SuppressWarnings("unchecked") final VectorFieldTypeInformation<NumberVector> castType = (VectorFieldTypeInformation<NumberVector>) type;
        final int dim = castType.getDimensionality();
        if (keep) {
            bundle.appendColumn(type, column);
        }
        // Uncertain objects produced
        final List<UO> uos = new ArrayList<>(column.size());
        // Normalization scan
        FiniteProgress nprog = LOG.isVerbose() ? new FiniteProgress("Derive uncertain objects", objects.dataLength(), LOG) : null;
        for (int i = 0; i < objects.dataLength(); i++) {
            final NumberVector obj = (NumberVector) column.get(i);
            final UO normalizedObj = generator.newFeatureVector(rand, obj, ArrayLikeUtil.NUMBERVECTORADAPTER);
            uos.add(normalizedObj);
            LOG.incrementProcessed(nprog);
        }
        LOG.ensureCompleted(nprog);
        // Add column with uncertain objects
        bundle.appendColumn(new VectorFieldTypeInformation<UO>(generator.getFactory(), dim), uos);
    }
    return bundle;
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) UncertainObject(de.lmu.ifi.dbs.elki.data.uncertain.UncertainObject) ArrayList(java.util.ArrayList) List(java.util.List)

Example 37 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class DisjointCrossValidation method nextPartitioning.

@Override
public TrainingAndTestSet nextPartitioning() {
    if (fold >= nfold) {
        return null;
    }
    final int tesize = sizes[fold], trsize = bundle.dataLength() - tesize;
    MultipleObjectsBundle training = new MultipleObjectsBundle();
    MultipleObjectsBundle test = new MultipleObjectsBundle();
    // Process column-wise.
    for (int c = 0, cs = bundle.metaLength(); c < cs; ++c) {
        ArrayList<Object> tr = new ArrayList<>(trsize), te = new ArrayList<>(tesize);
        for (int i = 0; i < bundle.dataLength(); ++i) {
            ((assignment[i] != fold) ? tr : te).add(bundle.data(i, c));
        }
        training.appendColumn(bundle.meta(c), tr);
        test.appendColumn(bundle.meta(c), te);
    }
    ++fold;
    return new TrainingAndTestSet(training, test, labels);
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList)

Example 38 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class RandomizedCrossValidation method nextPartitioning.

@Override
public TrainingAndTestSet nextPartitioning() {
    if (fold >= nfold) {
        return null;
    }
    MultipleObjectsBundle training = new MultipleObjectsBundle();
    MultipleObjectsBundle test = new MultipleObjectsBundle();
    Random rnd = random.getRandom();
    int datalen = bundle.dataLength();
    boolean[] assignment = new boolean[datalen];
    int trsize = 0, tesize = 0;
    for (int i = 0; i < assignment.length; ++i) {
        boolean p = rnd.nextInt(nfold) < nfold - 1;
        assignment[i] = p;
        @SuppressWarnings("unused") int discard = p ? ++trsize : ++tesize;
    }
    // Process column-wise.
    for (int c = 0, cs = bundle.metaLength(); c < cs; ++c) {
        ArrayList<Object> tr = new ArrayList<>(trsize), te = new ArrayList<>(tesize);
        for (int i = 0; i < datalen; ++i) {
            (assignment[i] ? tr : te).add(bundle.data(i, c));
        }
        training.appendColumn(bundle.meta(c), tr);
        test.appendColumn(bundle.meta(c), te);
    }
    ++fold;
    return new TrainingAndTestSet(training, test, labels);
}
Also used : Random(java.util.Random) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList)

Example 39 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class GlobalPrincipalComponentAnalysisTransformTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "transformation-test-1.csv";
    GlobalPrincipalComponentAnalysisTransform<DoubleVector> filter = new ELKIBuilder<GlobalPrincipalComponentAnalysisTransform<DoubleVector>>(GlobalPrincipalComponentAnalysisTransform.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // We verify that the result has mean 0 and variance 1 in each column.
    // We also expect that covariances of any two columns are 0.
    CovarianceMatrix cm = new CovarianceMatrix(dim);
    MeanVariance[] mvs = MeanVariance.newArray(dim);
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        cm.put(d);
        for (int col = 0; col < dim; col++) {
            final double v = d.doubleValue(col);
            if (v > Double.NEGATIVE_INFINITY && v < Double.POSITIVE_INFINITY) {
                mvs[col].put(v);
            }
        }
    }
    double[][] ncm = cm.destroyToPopulationMatrix();
    for (int col = 0; col < dim; col++) {
        for (int row = 0; row < dim; row++) {
            assertEquals("Unexpected covariance", col == row ? 1. : 0., ncm[row][col], 1e-15);
        }
        assertEquals("Mean not as expected", 0., mvs[col].getMean(), 1e-15);
        assertEquals("Variance not as expected", 1., mvs[col].getNaiveVariance(), 1e-15);
    }
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 40 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class ClassLabelFilterTest method parameters.

/**
 * Test with parameter c as the column which is to be converted to a class
 * label.
 */
@Test
public void parameters() {
    final int c = 2;
    String filename = UNITTEST + "external-id-test-1.csv";
    ClassLabelFilter filter = // 
    new ELKIBuilder<>(ClassLabelFilter.class).with(ClassLabelFilter.Parameterizer.CLASS_LABEL_INDEX_ID, c).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    // Ensure that the filter has correctly formed the bundle.
    // We expect that the bundle's first column is a number vector field.
    // We expect that the bundle's second column is a SimpleClassLabel
    // We expect that the bundle's third column is a LabelList
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(bundle.meta(0)));
    // Ensure that the second column are the ExternalID objects.
    Object obj = bundle.data(0, 1);
    assertEquals("Unexpected data type", SimpleClassLabel.class, obj.getClass());
    // Ensure that the length of the list of ExternalID objects has the correct
    // length.
    assertEquals("Unexpected data length", bundle.dataLength(), bundle.getColumn(1).size());
    // Ensure that the third column are the LabelList objects.
    obj = bundle.data(0, 2);
    assertEquals("Unexpected data type", LabelList.class, obj.getClass());
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Aggregations

MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)72 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)37 Test (org.junit.Test)37 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)27 ArrayList (java.util.ArrayList)19 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 List (java.util.List)7 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)5 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 InputStreamDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection)5 InputStream (java.io.InputStream)5 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)4 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4