Search in sources :

Example 1 with Int2DoubleOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap in project pinot by linkedin.

the class DoubleGroupByResultHolder method switchToMapMode.

/**
   * Helper method to switch the storage from array mode to map mode.
   *
   * @param initialPriorityQueueSize Initial size of priority queue
   */
private void switchToMapMode(int initialPriorityQueueSize) {
    _storageMode = StorageMode.MAP_STORAGE;
    _resultMap = new Int2DoubleOpenHashMap(_resultHolderCapacity);
    _priorityQueue = new IntDoubleIndexedPriorityQueue(initialPriorityQueueSize, _minHeap);
    for (int id = 0; id < _resultHolderCapacity; id++) {
        _resultMap.put(id, _resultArray[id]);
        _priorityQueue.put(id, _resultArray[id]);
    }
}
Also used : IntDoubleIndexedPriorityQueue(com.linkedin.pinot.core.util.IntDoubleIndexedPriorityQueue) Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap)

Example 2 with Int2DoubleOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap in project pinot by linkedin.

the class IntDoubleIndexedPriorityQueueTest method test.

/**
   * Helper method builds the priority queue, randomly updates elements and
   * then asserts the following:
   * <ul>
   *   <li> Elements are popped from the priority queue in the expected order. </li>
   *   <li> Size of the priority queue is as expected (after elements are updated). </li>
   * </ul>
   * @param minHeap Min or max mode
   */
public void test(boolean minHeap) {
    Random random = new Random(0);
    IntDoubleIndexedPriorityQueue pq = new IntDoubleIndexedPriorityQueue(NUM_RECORDS, minHeap);
    Int2DoubleOpenHashMap map = new Int2DoubleOpenHashMap(NUM_RECORDS);
    // Initialize the priority queue.
    for (int i = 0; i < NUM_RECORDS; i++) {
        double value = random.nextDouble();
        pq.put(i, value);
        map.put(i, value);
    }
    // Update some records randomly
    for (int i = 0; i < NUM_RECORDS; i++) {
        int key = random.nextInt(NUM_RECORDS);
        double value = random.nextDouble();
        pq.put(key, value);
        map.put(key, value);
    }
    // Transfer the map into list so it can be sorted.
    List<Pairs.IntDoublePair> list = new ArrayList<>(NUM_RECORDS);
    for (Int2DoubleMap.Entry entry : map.int2DoubleEntrySet()) {
        list.add(new Pairs.IntDoublePair(entry.getKey(), entry.getValue()));
    }
    // Comparison for min heap is the same as that for ascending order.
    boolean descendingOrder = !minHeap;
    Collections.sort(list, new Pairs.IntDoubleComparator(descendingOrder));
    // Ensure that elements are popped from priority queue in the expected order.
    int i = 0;
    while (!pq.isEmpty()) {
        Pairs.IntDoublePair actual = pq.poll();
        Pairs.IntDoublePair expected = list.get(i++);
        Assert.assertEquals(actual.getIntValue(), expected.getIntValue());
        Assert.assertEquals(actual.getDoubleValue(), expected.getDoubleValue());
    }
    // Assert that priority queue had expected number of elements.
    Assert.assertEquals(i, list.size());
}
Also used : Int2DoubleMap(it.unimi.dsi.fastutil.ints.Int2DoubleMap) ArrayList(java.util.ArrayList) Pairs(com.linkedin.pinot.common.utils.Pairs) Random(java.util.Random) IntDoubleIndexedPriorityQueue(com.linkedin.pinot.core.util.IntDoubleIndexedPriorityQueue) Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap)

Example 3 with Int2DoubleOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap in project ignite by apache.

the class ColumnDecisionTreeTrainerBenchmark method loadVectorsIntoSparseDistributedMatrixCache.

/**
 * Load vectors into sparse distributed matrix.
 *
 * @param cacheName Name of cache where matrix is stored.
 * @param uuid UUID of matrix.
 * @param iter Iterator over vectors.
 * @param vectorSize size of vectors.
 */
private void loadVectorsIntoSparseDistributedMatrixCache(String cacheName, UUID uuid, Iterator<? extends org.apache.ignite.ml.math.Vector> iter, int vectorSize) {
    try (IgniteDataStreamer<SparseMatrixKey, Map<Integer, Double>> streamer = Ignition.localIgnite().dataStreamer(cacheName)) {
        int sampleIdx = 0;
        streamer.allowOverwrite(true);
        streamer.receiver(StreamTransformer.from((e, arg) -> {
            Map<Integer, Double> val = e.getValue();
            if (val == null)
                val = new Int2DoubleOpenHashMap();
            val.putAll((Map<Integer, Double>) arg[0]);
            e.setValue(val);
            return null;
        }));
        // Feature index -> (sample index -> value)
        Map<Integer, Map<Integer, Double>> batch = new HashMap<>();
        IntStream.range(0, vectorSize).forEach(i -> batch.put(i, new HashMap<>()));
        int batchSize = 1000;
        while (iter.hasNext()) {
            org.apache.ignite.ml.math.Vector next = iter.next();
            for (int i = 0; i < vectorSize; i++) batch.get(i).put(sampleIdx, next.getX(i));
            X.println("Sample index: " + sampleIdx);
            if (sampleIdx % batchSize == 0) {
                batch.keySet().forEach(fi -> streamer.addData(new SparseMatrixKey(fi, uuid, fi), batch.get(fi)));
                IntStream.range(0, vectorSize).forEach(i -> batch.put(i, new HashMap<>()));
            }
            sampleIdx++;
        }
        if (sampleIdx % batchSize != 0) {
            batch.keySet().forEach(fi -> streamer.addData(new SparseMatrixKey(fi, uuid, fi), batch.get(fi)));
            IntStream.range(0, vectorSize).forEach(i -> batch.put(i, new HashMap<>()));
        }
    }
}
Also used : CacheAtomicityMode(org.apache.ignite.cache.CacheAtomicityMode) Arrays(java.util.Arrays) FeaturesCache(org.apache.ignite.ml.trees.trainers.columnbased.caches.FeaturesCache) IgniteTestResources(org.apache.ignite.testframework.junits.IgniteTestResources) Random(java.util.Random) BiIndex(org.apache.ignite.ml.trees.trainers.columnbased.BiIndex) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) SparseDistributedMatrixStorage(org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage) VarianceSplitCalculator(org.apache.ignite.ml.trees.trainers.columnbased.contsplitcalcs.VarianceSplitCalculator) Vector(org.apache.ignite.ml.math.Vector) Estimators(org.apache.ignite.ml.estimators.Estimators) Map(java.util.Map) X(org.apache.ignite.internal.util.typedef.X) Level(org.apache.log4j.Level) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) MatrixColumnDecisionTreeTrainerInput(org.apache.ignite.ml.trees.trainers.columnbased.MatrixColumnDecisionTreeTrainerInput) LabeledVectorDouble(org.apache.ignite.ml.structures.LabeledVectorDouble) BaseDecisionTreeTest(org.apache.ignite.ml.trees.BaseDecisionTreeTest) IgniteTriFunction(org.apache.ignite.ml.math.functions.IgniteTriFunction) ProjectionsCache(org.apache.ignite.ml.trees.trainers.columnbased.caches.ProjectionsCache) UUID(java.util.UUID) StreamTransformer(org.apache.ignite.stream.StreamTransformer) Collectors(java.util.stream.Collectors) IgniteCache(org.apache.ignite.IgniteCache) ContextCache(org.apache.ignite.ml.trees.trainers.columnbased.caches.ContextCache) DoubleStream(java.util.stream.DoubleStream) IgniteBiTuple(org.apache.ignite.lang.IgniteBiTuple) List(java.util.List) IgniteConfiguration(org.apache.ignite.configuration.IgniteConfiguration) Stream(java.util.stream.Stream) SparseMatrixKey(org.apache.ignite.ml.math.distributed.keys.impl.SparseMatrixKey) SplitCache(org.apache.ignite.ml.trees.trainers.columnbased.caches.SplitCache) RegionCalculators(org.apache.ignite.ml.trees.trainers.columnbased.regcalcs.RegionCalculators) IntStream(java.util.stream.IntStream) DecisionTreeModel(org.apache.ignite.ml.trees.models.DecisionTreeModel) IgniteFunction(org.apache.ignite.ml.math.functions.IgniteFunction) Model(org.apache.ignite.ml.Model) HashMap(java.util.HashMap) Function(java.util.function.Function) GiniSplitCalculator(org.apache.ignite.ml.trees.trainers.columnbased.contsplitcalcs.GiniSplitCalculator) BiIndexedCacheColumnDecisionTreeTrainerInput(org.apache.ignite.ml.trees.trainers.columnbased.BiIndexedCacheColumnDecisionTreeTrainerInput) CacheWriteSynchronizationMode(org.apache.ignite.cache.CacheWriteSynchronizationMode) IgniteUtils(org.apache.ignite.internal.util.IgniteUtils) MnistUtils(org.apache.ignite.ml.util.MnistUtils) LinkedList(java.util.LinkedList) Properties(java.util.Properties) Iterator(java.util.Iterator) ContinuousSplitCalculators(org.apache.ignite.ml.trees.trainers.columnbased.contsplitcalcs.ContinuousSplitCalculators) IOException(java.io.IOException) SplitDataGenerator(org.apache.ignite.ml.trees.SplitDataGenerator) Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap) Ignition(org.apache.ignite.Ignition) CacheConfiguration(org.apache.ignite.configuration.CacheConfiguration) IgniteDataStreamer(org.apache.ignite.IgniteDataStreamer) Tracer(org.apache.ignite.ml.math.Tracer) StorageConstants(org.apache.ignite.ml.math.StorageConstants) Assert(org.junit.Assert) Collections(java.util.Collections) ColumnDecisionTreeTrainer(org.apache.ignite.ml.trees.trainers.columnbased.ColumnDecisionTreeTrainer) GridCacheProcessor(org.apache.ignite.internal.processors.cache.GridCacheProcessor) InputStream(java.io.InputStream) CacheMode(org.apache.ignite.cache.CacheMode) HashMap(java.util.HashMap) Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap) Vector(org.apache.ignite.ml.math.Vector) Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap) SparseMatrixKey(org.apache.ignite.ml.math.distributed.keys.impl.SparseMatrixKey) Map(java.util.Map) HashMap(java.util.HashMap) Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap)

Example 4 with Int2DoubleOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap in project ignite by apache.

the class SparseDistributedMatrixStorage method matrixSet.

/**
 * Distributed matrix set.
 *
 * @param a Row or column index.
 * @param b Row or column index.
 * @param v New value to set.
 */
private void matrixSet(int a, int b, double v) {
    // Remote set on the primary node (where given row or column is stored locally).
    ignite().compute(getClusterGroupForGivenKey(CACHE_NAME, a)).run(() -> {
        IgniteCache<RowColMatrixKey, Map<Integer, Double>> cache = Ignition.localIgnite().getOrCreateCache(CACHE_NAME);
        // Local get.
        Map<Integer, Double> map = cache.localPeek(getCacheKey(a), CachePeekMode.PRIMARY);
        if (map == null) {
            // Remote entry get.
            map = cache.get(getCacheKey(a));
            if (map == null)
                map = acsMode == SEQUENTIAL_ACCESS_MODE ? new Int2DoubleRBTreeMap() : new Int2DoubleOpenHashMap();
        }
        if (v != 0.0)
            map.put(b, v);
        else if (map.containsKey(b))
            map.remove(b);
        // Local put.
        cache.put(getCacheKey(a), map);
    });
}
Also used : Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap) RowColMatrixKey(org.apache.ignite.ml.math.distributed.keys.RowColMatrixKey) Int2DoubleRBTreeMap(it.unimi.dsi.fastutil.ints.Int2DoubleRBTreeMap) Int2DoubleRBTreeMap(it.unimi.dsi.fastutil.ints.Int2DoubleRBTreeMap) Int2DoubleOpenHashMap(it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap) Map(java.util.Map)

Aggregations

Int2DoubleOpenHashMap (it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap)4 IntDoubleIndexedPriorityQueue (com.linkedin.pinot.core.util.IntDoubleIndexedPriorityQueue)2 Map (java.util.Map)2 Random (java.util.Random)2 Pairs (com.linkedin.pinot.common.utils.Pairs)1 Int2DoubleMap (it.unimi.dsi.fastutil.ints.Int2DoubleMap)1 Int2DoubleRBTreeMap (it.unimi.dsi.fastutil.ints.Int2DoubleRBTreeMap)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Properties (java.util.Properties)1 UUID (java.util.UUID)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1