use of it.unimi.dsi.fastutil.ints.IntIterator in project druid by druid-io.
the class StringDimensionMergerV9 method mergeBitmaps.
static void mergeBitmaps(List<IntBuffer> segmentRowNumConversions, Indexed<String> dimVals, BitmapFactory bmpFactory, RTree tree, boolean hasSpatial, IndexSeeker[] dictIdSeeker, int dictId, List<IndexableAdapter> adapters, String dimensionName, MutableBitmap nullRowsBitmap, GenericIndexedWriter<ImmutableBitmap> bitmapWriter) throws IOException {
List<ConvertingIndexedInts> convertedInvertedIndexesToMerge = Lists.newArrayListWithCapacity(adapters.size());
for (int j = 0; j < adapters.size(); ++j) {
int seekedDictId = dictIdSeeker[j].seek(dictId);
if (seekedDictId != IndexSeeker.NOT_EXIST) {
convertedInvertedIndexesToMerge.add(new ConvertingIndexedInts(adapters.get(j).getBitmapIndex(dimensionName, seekedDictId), segmentRowNumConversions.get(j)));
}
}
MutableBitmap mergedIndexes = bmpFactory.makeEmptyMutableBitmap();
List<IntIterator> convertedInvertedIndexesIterators = new ArrayList<>(convertedInvertedIndexesToMerge.size());
for (ConvertingIndexedInts convertedInvertedIndexes : convertedInvertedIndexesToMerge) {
convertedInvertedIndexesIterators.add(convertedInvertedIndexes.iterator());
}
// Merge ascending index iterators into a single one, remove duplicates, and add to the mergedIndexes bitmap.
// Merge is needed, because some compacting MutableBitmap implementations are very inefficient when bits are
// added not in the ascending order.
int prevRow = IndexMerger.INVALID_ROW;
for (IntIterator mergeIt = IntIteratorUtils.mergeAscending(convertedInvertedIndexesIterators); mergeIt.hasNext(); ) {
int row = mergeIt.nextInt();
if (row != prevRow && row != IndexMerger.INVALID_ROW) {
mergedIndexes.add(row);
}
prevRow = row;
}
if ((dictId == 0) && (Iterables.getFirst(dimVals, "") == null)) {
mergedIndexes.or(nullRowsBitmap);
}
bitmapWriter.write(bmpFactory.makeImmutableBitmap(mergedIndexes));
if (hasSpatial) {
String dimVal = dimVals.get(dictId);
if (dimVal != null) {
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
float[] coords = new float[stringCoords.size()];
for (int j = 0; j < coords.length; j++) {
coords[j] = Float.valueOf(stringCoords.get(j));
}
tree.insert(coords, mergedIndexes);
}
}
}
use of it.unimi.dsi.fastutil.ints.IntIterator in project pinot by linkedin.
the class ObjectCustomSerDe method serializeIntOpenHashSet.
/**
* Helper method to serialize an {@link IntOpenHashSet}.
*/
private static byte[] serializeIntOpenHashSet(IntOpenHashSet intOpenHashSet) throws IOException {
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream);
// Write the size of the set.
dataOutputStream.writeInt(intOpenHashSet.size());
IntIterator intIterator = intOpenHashSet.iterator();
while (intIterator.hasNext()) {
dataOutputStream.writeInt(intIterator.nextInt());
}
return byteArrayOutputStream.toByteArray();
}
use of it.unimi.dsi.fastutil.ints.IntIterator in project elki by elki-project.
the class XSplitter method minimumOverlapSplit.
/**
* Perform an minimum overlap split. The
* {@link #chooseMinimumOverlapSplit(int, int, int, boolean) minimum overlap
* split} calculates the partition for the split dimension determined by
* {@link #chooseSplitAxis(Iterable, int, int) chooseSplitAxis}
* <code>(common split
* history, minFanout, maxEntries - minFanout + 1)</code> with the minimum
* overlap. This range may have been tested before (by the
* {@link #topologicalSplit()}), but for the minimum overlap test we need to
* test that anew. Note that this method returns <code>null</code>, if the
* minimum overlap split has a volume which is larger than the allowed
* <code>maxOverlap</code> ratio or if the tree's minimum fanout is not larger
* than the minimum directory size.
*
* @return distribution resulting from the minimum overlap split
*/
public SplitSorting minimumOverlapSplit() {
if (node.getEntry(0) instanceof LeafEntry) {
throw new IllegalArgumentException("The minimum overlap split will only be performed on directory nodes");
}
if (node.getNumEntries() < 2) {
throw new IllegalArgumentException("Splitting less than two entries is pointless.");
}
int maxEntries = tree.getDirCapacity() - 1;
int minFanout = tree.get_min_fanout();
if (node.getNumEntries() < maxEntries) {
throw new IllegalArgumentException("This entry list has not yet reached the maximum limit: " + node.getNumEntries() + "<=" + maxEntries);
}
assert !(node.getEntry(0) instanceof LeafEntry);
if (minFanout >= tree.getDirMinimum()) {
// minFanout not set for allowing underflowing nodes
return null;
}
IntIterator dimensionListing;
if (node.getEntry(0) instanceof XTreeDirectoryEntry) {
// filter common split dimensions
dimensionListing = getCommonSplitDimensions(node);
if (!dimensionListing.hasNext()) {
// no common dimensions
return null;
}
} else {
// test all dimensions
dimensionListing = new IntegerRangeIterator(0, node.getEntry(0).getDimensionality());
}
int formerSplitAxis = this.splitAxis;
// = maximum left-hand size
maxEntries = maxEntries + 1 - minFanout;
chooseSplitAxis(dimensionListing, minFanout, maxEntries);
// find the best split point
if (formerSplitAxis == this.splitAxis && tree.getDirMinimum() > minFanout) {
// remember: this follows an unsuccessful topological split
// avoid duplicate computations of {minEntries, ..., maxEntries}
double minOverlap = pastOverlap;
// test {minFanout, ..., minEntries - 1}
SplitSorting ret1 = chooseMinimumOverlapSplit(this.splitAxis, minFanout, tree.getDirMinimum() - 1, false);
if (ret1 != null && pastOverlap < minOverlap) {
// this is a valid choice
minOverlap = pastOverlap;
}
// test {maxEntries - minEntries + 2, ..., maxEntries - minFanout + 1}
SplitSorting ret2 = chooseMinimumOverlapSplit(this.splitAxis, minFanout, tree.getDirMinimum() - 1, true);
if (ret2 == null) {
// accept first range regardless of whether or not there is one
pastOverlap = minOverlap;
return ret1;
}
if (pastOverlap < minOverlap) {
// the second range is better
return ret2;
}
// the first range is better
pastOverlap = minOverlap;
return ret1;
} else {
return chooseMinimumOverlapSplit(this.splitAxis, minFanout, maxEntries, false);
}
}
use of it.unimi.dsi.fastutil.ints.IntIterator in project elki by elki-project.
the class RandomProjectedNeighborsAndDensities method computeSetsBounds.
/**
* Create random projections, project points and put points into sets of size
* about minSplitSize/2
*
* @param points points to process
* @param minSplitSize minimum size for which a point set is further
* partitioned (roughly corresponds to minPts in OPTICS)
* @param ptList Points that are to be projected
*/
public void computeSetsBounds(Relation<V> points, int minSplitSize, DBIDs ptList) {
this.minSplitSize = minSplitSize;
final int size = points.size();
final int dim = RelationUtil.dimensionality(points);
this.points = points;
// perform O(log N+log dim) splits of the entire point sets projections
int nPointSetSplits = (int) (logOProjectionConst * MathUtil.log2(size * dim + 1));
// perform O(log N+log dim) projections of the point set onto a random line
int nProject1d = (int) (logOProjectionConst * MathUtil.log2(size * dim + 1));
LOG.statistics(new LongStatistic(PREFIX + ".partition-size", nPointSetSplits));
LOG.statistics(new LongStatistic(PREFIX + ".num-projections", nProject1d));
splitsets = new ArrayList<>();
// perform projections of points
projectedPoints = new DoubleDataStore[nProject1d];
DoubleDataStore[] tmpPro = new DoubleDataStore[nProject1d];
Random rand = rnd.getSingleThreadedRandom();
FiniteProgress projp = LOG.isVerbose() ? new FiniteProgress("Random projections", nProject1d, LOG) : null;
for (int j = 0; j < nProject1d; j++) {
double[] currRp = new double[dim];
double sum = 0;
for (int i = 0; i < dim; i++) {
double fl = rand.nextDouble() - 0.5;
currRp[i] = fl;
sum += fl * fl;
}
sum = FastMath.sqrt(sum);
for (int i = 0; i < dim; i++) {
currRp[i] /= sum;
}
WritableDoubleDataStore currPro = DataStoreUtil.makeDoubleStorage(ptList, DataStoreFactory.HINT_HOT);
for (DBIDIter it = ptList.iter(); it.valid(); it.advance()) {
NumberVector vecPt = points.get(it);
// Dot product:
double sum2 = 0;
for (int i = 0; i < dim; i++) {
sum2 += currRp[i] * vecPt.doubleValue(i);
}
currPro.put(it, sum2);
}
projectedPoints[j] = currPro;
LOG.incrementProcessed(projp);
}
LOG.ensureCompleted(projp);
// Log the number of scalar projections performed.
long numprod = nProject1d * (long) ptList.size();
LOG.statistics(new LongStatistic(PREFIX + ".num-scalar-products", numprod));
// split entire point set, reuse projections by shuffling them
IntArrayList proind = new IntArrayList(nProject1d);
for (int j = 0; j < nProject1d; j++) {
proind.add(j);
}
FiniteProgress splitp = LOG.isVerbose() ? new FiniteProgress("Splitting data", nPointSetSplits, LOG) : null;
for (int avgP = 0; avgP < nPointSetSplits; avgP++) {
// shuffle projections
for (int i = 0; i < nProject1d; i++) {
tmpPro[i] = projectedPoints[i];
}
// Shuffle axes (Fisher-Yates)
for (int i = 1; i < nProject1d; i++) {
final int j = rand.nextInt(i);
// Swap i,j
proind.set(i, proind.set(j, proind.getInt(i)));
}
IntIterator it = proind.iterator();
int i = 0;
while (it.hasNext()) {
int cind = it.nextInt();
projectedPoints[cind] = tmpPro[i];
i++;
}
// split point set
splitupNoSort(DBIDUtil.newArray(ptList), 0, size, 0, rand);
LOG.incrementProcessed(splitp);
}
LOG.ensureCompleted(splitp);
}
use of it.unimi.dsi.fastutil.ints.IntIterator in project elki by elki-project.
the class LinearDiscriminantAnalysisFilter method computeProjectionMatrix.
@Override
protected double[][] computeProjectionMatrix(List<V> vectorcolumn, List<? extends ClassLabel> classcolumn, int dim) {
Map<ClassLabel, IntList> classes = partition(classcolumn);
// Fix indexing of classes:
List<ClassLabel> keys = new ArrayList<>(classes.keySet());
// Compute centroids:
List<Centroid> centroids = computeCentroids(dim, vectorcolumn, keys, classes);
final double[][] sigmaB, sigmaI;
// Between classes covariance:
{
CovarianceMatrix covmake = new CovarianceMatrix(dim);
for (Centroid c : centroids) {
covmake.put(c);
}
sigmaB = covmake.destroyToSampleMatrix();
}
{
// (Average) within class variance:
CovarianceMatrix covmake = new CovarianceMatrix(dim);
int numc = keys.size();
for (int i = 0; i < numc; i++) {
double[] c = centroids.get(i).getArrayRef();
// TODO: different weighting strategies? Sampling?
for (IntIterator it = classes.get(keys.get(i)).iterator(); it.hasNext(); ) {
covmake.put(minusEquals(vectorcolumn.get(it.nextInt()).toArray(), c));
}
}
sigmaI = covmake.destroyToSampleMatrix();
if (new LUDecomposition(sigmaI).det() == 0) {
for (int i = 0; i < dim; i++) {
sigmaI[i][i] += 1e-10;
}
}
}
double[][] sol = times(inverse(sigmaI), sigmaB);
EigenvalueDecomposition decomp = new EigenvalueDecomposition(sol);
SortedEigenPairs sorted = new SortedEigenPairs(decomp, false);
return transpose(sorted.eigenVectors(tdim));
}
Aggregations