Search in sources :

Example 6 with IntIterator

use of it.unimi.dsi.fastutil.ints.IntIterator in project elki by elki-project.

the class GeneratorXMLSpec method writeClusters.

/**
 * Write the resulting clusters to an output stream.
 *
 * @param outStream output stream
 * @param data Generated data
 * @throws IOException thrown on write errors
 */
public void writeClusters(OutputStreamWriter outStream, MultipleObjectsBundle data) throws IOException {
    int modelcol = -1;
    {
        // Find model column
        for (int i = 0; i < data.metaLength(); i++) {
            if (Model.TYPE.isAssignableFromType(data.meta(i))) {
                modelcol = i;
                break;
            }
        }
    }
    if (modelcol < 0) {
        throw new AbortException("No model column found in bundle.");
    }
    ArrayList<Model> models = new ArrayList<>();
    Map<Model, IntArrayList> modelMap = new HashMap<>();
    {
        // Build a map from model to the actual objects
        for (int i = 0; i < data.dataLength(); i++) {
            Model model = (Model) data.data(i, modelcol);
            IntArrayList modelids = modelMap.get(model);
            if (modelids == null) {
                models.add(model);
                modelids = new IntArrayList();
                modelMap.put(model, modelids);
            }
            modelids.add(i);
        }
    }
    // compute global discard values
    int totalsize = 0, totaldisc = 0;
    for (Entry<Model, IntArrayList> ent : modelMap.entrySet()) {
        totalsize += ent.getValue().size();
        if (ent.getKey() instanceof GeneratorSingleCluster) {
            totaldisc += ((GeneratorSingleCluster) ent.getKey()).getDiscarded();
        }
    }
    double globdens = (double) (totalsize + totaldisc) / totalsize;
    outStream.append("########################################################").append(LINE_SEPARATOR);
    outStream.append("## Number of clusters: " + models.size()).append(LINE_SEPARATOR);
    for (Model model : models) {
        IntArrayList ids = modelMap.get(model);
        outStream.append("########################################################").append(LINE_SEPARATOR);
        outStream.append("## Size: " + ids.size()).append(LINE_SEPARATOR);
        if (model instanceof GeneratorSingleCluster) {
            GeneratorSingleCluster cursclus = (GeneratorSingleCluster) model;
            outStream.append("########################################################").append(LINE_SEPARATOR);
            outStream.append("## Cluster: ").append(cursclus.getName()).append(LINE_SEPARATOR);
            double[] cmin = cursclus.getClipmin();
            double[] cmax = cursclus.getClipmax();
            if (cmin != null && cmax != null) {
                // 
                outStream.append("## Clipping: ").append(FormatUtil.format(cmin)).append(" - ").append(FormatUtil.format(cmax)).append(LINE_SEPARATOR);
            }
            outStream.append("## Density correction factor: " + cursclus.getDensityCorrection()).append(LINE_SEPARATOR);
            outStream.append("## Generators:").append(LINE_SEPARATOR);
            for (int i = 0; i < cursclus.getDim(); i++) {
                Distribution gen = cursclus.getDistribution(i);
                outStream.append("##   ").append(gen.toString()).append(LINE_SEPARATOR);
            }
            if (cursclus.getTransformation() != null && cursclus.getTransformation().getTransformation() != null) {
                outStream.append("## Affine transformation matrix:").append(LINE_SEPARATOR);
                outStream.append(FormatUtil.format(cursclus.getTransformation().getTransformation(), "## ")).append(LINE_SEPARATOR);
            }
            outStream.append("## Discards: " + cursclus.getDiscarded() + " Retries left: " + cursclus.getRetries()).append(LINE_SEPARATOR);
            double corf = /* cursclus.overweight */
            (double) (cursclus.getSize() + cursclus.getDiscarded()) / cursclus.getSize() / globdens;
            outStream.append("## Density correction factor estimation: " + corf).append(LINE_SEPARATOR);
        }
        outStream.append("########################################################").append(LINE_SEPARATOR);
        for (IntIterator iter = ids.iterator(); iter.hasNext(); ) {
            int num = iter.nextInt();
            for (int c = 0; c < data.metaLength(); c++) {
                if (c != modelcol) {
                    if (c > 0) {
                        outStream.append(' ');
                    }
                    outStream.append(data.data(num, c).toString());
                }
            }
            outStream.append(LINE_SEPARATOR);
        }
    }
}
Also used : IntIterator(it.unimi.dsi.fastutil.ints.IntIterator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) GeneratorSingleCluster(de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorSingleCluster) Distribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution) Model(de.lmu.ifi.dbs.elki.data.model.Model) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 7 with IntIterator

use of it.unimi.dsi.fastutil.ints.IntIterator in project elki by elki-project.

the class LinearDiscriminantAnalysisFilter method computeCentroids.

/**
 * Compute the centroid for each class.
 *
 * @param dim Dimensionality
 * @param vectorcolumn Vector column
 * @param keys Key index
 * @param classes Classes
 * @return Centroids for each class.
 */
protected List<Centroid> computeCentroids(int dim, List<V> vectorcolumn, List<ClassLabel> keys, Map<ClassLabel, IntList> classes) {
    final int numc = keys.size();
    List<Centroid> centroids = new ArrayList<>(numc);
    for (int i = 0; i < numc; i++) {
        Centroid c = new Centroid(dim);
        for (IntIterator it = classes.get(keys.get(i)).iterator(); it.hasNext(); ) {
            c.put(vectorcolumn.get(it.nextInt()));
        }
        centroids.add(c);
    }
    return centroids;
}
Also used : Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) IntIterator(it.unimi.dsi.fastutil.ints.IntIterator) ArrayList(java.util.ArrayList)

Example 8 with IntIterator

use of it.unimi.dsi.fastutil.ints.IntIterator in project presto by prestodb.

the class GroupedTopNBuilder method processPage.

private void processPage(Page newPage, GroupByIdBlock groupIds) {
    checkArgument(newPage != null);
    checkArgument(groupIds != null);
    int firstPositionToInsert = findFirstPositionToInsert(newPage, groupIds);
    if (firstPositionToInsert < 0) {
        // no insertions required
        return;
    }
    PageReference newPageReference = new PageReference(newPage);
    memorySizeInBytes += newPageReference.getEstimatedSizeInBytes();
    int newPageId;
    if (emptyPageReferenceSlots.isEmpty()) {
        // all the previous slots are full; create a new one
        pageReferences.ensureCapacity(currentPageCount + 1);
        newPageId = currentPageCount;
        currentPageCount++;
    } else {
        // reuse a previously removed page's slot
        newPageId = emptyPageReferenceSlots.dequeueInt();
    }
    verify(pageReferences.setIfNull(newPageId, newPageReference), "should not overwrite a non-empty slot");
    // ensure sufficient group capacity outside of the loop
    groupedRows.ensureCapacity(groupIds.getGroupCount());
    // update the affected heaps and record candidate pages that need compaction
    IntSet pagesToCompact = new IntOpenHashSet();
    for (int position = firstPositionToInsert; position < newPage.getPositionCount(); position++) {
        long groupId = groupIds.getGroupId(position);
        RowHeap rows = groupedRows.get(groupId);
        if (rows == null) {
            // a new group
            rows = new RowHeap(rowHeapComparator);
            groupedRows.set(groupId, rows);
        } else {
            // update an existing group;
            // remove the memory usage for this group for now; add it back after update
            memorySizeInBytes -= rows.getEstimatedSizeInBytes();
        }
        if (rows.size() < topN) {
            Row row = new Row(newPageId, position);
            newPageReference.reference(row);
            rows.enqueue(row);
        } else {
            // may compare with the topN-th element with in the heap to decide if update is necessary
            Row previousRow = rows.first();
            PageReference previousPageReference = pageReferences.get(previousRow.getPageId());
            if (pageWithPositionComparator.compareTo(newPage, position, previousPageReference.getPage(), previousRow.getPosition()) < 0) {
                // update reference and the heap
                rows.dequeue();
                previousPageReference.dereference(previousRow.getPosition());
                Row newRow = new Row(newPageId, position);
                newPageReference.reference(newRow);
                rows.enqueue(newRow);
                // compact a page if it is not the current input page and the reference count is below the threshold
                if (previousPageReference.getPage() != newPage && previousPageReference.getUsedPositionCount() * COMPACT_THRESHOLD < previousPageReference.getPage().getPositionCount()) {
                    pagesToCompact.add(previousRow.getPageId());
                }
            }
        }
        memorySizeInBytes += rows.getEstimatedSizeInBytes();
    }
    // may compact the new page as well
    if (newPageReference.getUsedPositionCount() * COMPACT_THRESHOLD < newPage.getPositionCount()) {
        verify(pagesToCompact.add(newPageId));
    }
    // compact pages
    IntIterator iterator = pagesToCompact.iterator();
    while (iterator.hasNext()) {
        int pageId = iterator.nextInt();
        PageReference pageReference = pageReferences.get(pageId);
        if (pageReference.getUsedPositionCount() == 0) {
            pageReferences.set(pageId, null);
            emptyPageReferenceSlots.enqueue(pageId);
            memorySizeInBytes -= pageReference.getEstimatedSizeInBytes();
        } else {
            memorySizeInBytes -= pageReference.getEstimatedSizeInBytes();
            pageReference.compact();
            memorySizeInBytes += pageReference.getEstimatedSizeInBytes();
        }
    }
}
Also used : IntOpenHashSet(it.unimi.dsi.fastutil.ints.IntOpenHashSet) IntIterator(it.unimi.dsi.fastutil.ints.IntIterator) IntSet(it.unimi.dsi.fastutil.ints.IntSet)

Example 9 with IntIterator

use of it.unimi.dsi.fastutil.ints.IntIterator in project symja_android_library by axkr.

the class TimeColumn method removeMissing.

@Override
public TimeColumn removeMissing() {
    TimeColumn noMissing = emptyCopy();
    IntIterator iterator = intIterator();
    while (iterator.hasNext()) {
        int i = iterator.nextInt();
        if (!valueIsMissing(i)) {
            noMissing.appendInternal(i);
        }
    }
    return noMissing;
}
Also used : IntIterator(it.unimi.dsi.fastutil.ints.IntIterator)

Example 10 with IntIterator

use of it.unimi.dsi.fastutil.ints.IntIterator in project symja_android_library by axkr.

the class DateFilters method eval.

/**
 * This version operates on predicates that treat the given IntPredicate as operating on a packed
 * local time This is much more efficient that using a LocalTimePredicate, but requires that the
 * developer understand the semantics of packedLocalTimes
 */
default Selection eval(IntPredicate predicate) {
    Selection selection = new BitmapBackedSelection();
    IntIterator iterator = intIterator();
    int idx = 0;
    while (iterator.hasNext()) {
        int next = iterator.nextInt();
        if (predicate.test(next)) {
            selection.add(idx);
        }
        idx++;
    }
    return selection;
}
Also used : IntIterator(it.unimi.dsi.fastutil.ints.IntIterator) BitmapBackedSelection(tech.tablesaw.selection.BitmapBackedSelection) Selection(tech.tablesaw.selection.Selection) BitmapBackedSelection(tech.tablesaw.selection.BitmapBackedSelection)

Aggregations

IntIterator (it.unimi.dsi.fastutil.ints.IntIterator)31 ArrayList (java.util.ArrayList)5 Test (org.junit.Test)4 IntOpenHashSet (it.unimi.dsi.fastutil.ints.IntOpenHashSet)3 Centroid (de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid)2 IntArrayList (it.unimi.dsi.fastutil.ints.IntArrayList)2 BitmapBackedSelection (tech.tablesaw.selection.BitmapBackedSelection)2 Selection (tech.tablesaw.selection.Selection)2 PartitionKey (com.tencent.angel.PartitionKey)1 MatrixMeta (com.tencent.angel.ml.matrix.MatrixMeta)1 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)1 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 Model (de.lmu.ifi.dbs.elki.data.model.Model)1 GeneratorSingleCluster (de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorSingleCluster)1 LeafEntry (de.lmu.ifi.dbs.elki.index.tree.LeafEntry)1 XTreeDirectoryEntry (de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.xtree.XTreeDirectoryEntry)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)1 CovarianceMatrix (de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix)1 EigenvalueDecomposition (de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition)1