Search in sources :

Example 11 with Int2ObjectOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project druid by druid-io.

the class ListFilteredDimensionSpecDimensionSelectorTest method createDictionaries.

private NonnullPair<Object2IntMap<String>, Int2ObjectMap<String>> createDictionaries(List<List<String>> values) {
    Object2IntMap<String> dictionary = new Object2IntOpenHashMap<>();
    Int2ObjectMap<String> reverseDictionary = new Int2ObjectOpenHashMap<>();
    MutableInt nextId = new MutableInt(0);
    for (List<String> multiValue : values) {
        for (String value : multiValue) {
            int dictId = dictionary.computeIntIfAbsent(value, k -> nextId.getAndIncrement());
            reverseDictionary.putIfAbsent(dictId, value);
        }
    }
    return new NonnullPair<>(dictionary, reverseDictionary);
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) NonnullPair(org.apache.druid.java.util.common.NonnullPair) MutableInt(org.apache.commons.lang3.mutable.MutableInt) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap)

Example 12 with Int2ObjectOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project druid by druid-io.

the class PartialSegmentMergeTask method runTask.

@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
    // Group partitionLocations by interval and partitionId
    final Map<Interval, Int2ObjectMap<List<PartitionLocation>>> intervalToBuckets = new HashMap<>();
    for (PartitionLocation location : ioConfig.getPartitionLocations()) {
        intervalToBuckets.computeIfAbsent(location.getInterval(), k -> new Int2ObjectOpenHashMap<>()).computeIfAbsent(location.getBucketId(), k -> new ArrayList<>()).add(location);
    }
    final List<TaskLock> locks = toolbox.getTaskActionClient().submit(new SurrogateAction<>(supervisorTaskId, new LockListAction()));
    final Map<Interval, String> intervalToVersion = Maps.newHashMapWithExpectedSize(locks.size());
    locks.forEach(lock -> {
        if (lock.isRevoked()) {
            throw new ISE("Lock[%s] is revoked", lock);
        }
        final String mustBeNull = intervalToVersion.put(lock.getInterval(), lock.getVersion());
        if (mustBeNull != null) {
            throw new ISE("Unexpected state: Two versions([%s], [%s]) for the same interval[%s]", lock.getVersion(), mustBeNull, lock.getInterval());
        }
    });
    final Stopwatch fetchStopwatch = Stopwatch.createStarted();
    final Map<Interval, Int2ObjectMap<List<File>>> intervalToUnzippedFiles = fetchSegmentFiles(toolbox, intervalToBuckets);
    final long fetchTime = fetchStopwatch.elapsed(TimeUnit.SECONDS);
    fetchStopwatch.stop();
    LOG.info("Fetch took [%s] seconds", fetchTime);
    final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
    1, getTuningConfig().getChatHandlerTimeout(), getTuningConfig().getChatHandlerNumRetries());
    final File persistDir = toolbox.getPersistDir();
    org.apache.commons.io.FileUtils.deleteQuietly(persistDir);
    FileUtils.mkdirp(persistDir);
    final Set<DataSegment> pushedSegments = mergeAndPushSegments(toolbox, getDataSchema(), getTuningConfig(), persistDir, intervalToVersion, intervalToUnzippedFiles);
    taskClient.report(supervisorTaskId, new PushedSegmentsReport(getId(), Collections.emptySet(), pushedSegments, ImmutableMap.of()));
    return TaskStatus.success(getId());
}
Also used : TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) LockListAction(org.apache.druid.indexing.common.actions.LockListAction) Logger(org.apache.druid.java.util.common.logger.Logger) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Arrays(java.util.Arrays) Stopwatch(com.google.common.base.Stopwatch) HashMap(java.util.HashMap) TaskResource(org.apache.druid.indexing.common.task.TaskResource) TaskStatus(org.apache.druid.indexer.TaskStatus) Pair(org.apache.druid.java.util.common.Pair) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Interval(org.joda.time.Interval) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Map(java.util.Map) TaskLock(org.apache.druid.indexing.common.TaskLock) RetryUtils(org.apache.druid.java.util.common.RetryUtils) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) FileUtils(org.apache.druid.java.util.common.FileUtils) Nullable(javax.annotation.Nullable) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) ClientBasedTaskInfoProvider(org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider) IndexMerger(org.apache.druid.segment.IndexMerger) Closer(org.apache.druid.java.util.common.io.Closer) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) SurrogateAction(org.apache.druid.indexing.common.actions.SurrogateAction) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) Preconditions(com.google.common.base.Preconditions) IndexIO(org.apache.druid.segment.IndexIO) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) LockListAction(org.apache.druid.indexing.common.actions.LockListAction) HashMap(java.util.HashMap) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) ArrayList(java.util.ArrayList) Stopwatch(com.google.common.base.Stopwatch) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) DataSegment(org.apache.druid.timeline.DataSegment) TaskLock(org.apache.druid.indexing.common.TaskLock) ISE(org.apache.druid.java.util.common.ISE) ClientBasedTaskInfoProvider(org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider) File(java.io.File) Interval(org.joda.time.Interval)

Example 13 with Int2ObjectOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project apex-malhar by apache.

the class DimensionalConfigurationSchema method buildDimensionsDescriptorIDAggregatorIDMaps.

/**
 * Precondition: all depended aggregators( for example AVG depended on SUM and COUNT, Composite Aggregators
 * depended on embed aggregators )
 * should already solved. This function will not handle this dependencies.
 */
protected void buildDimensionsDescriptorIDAggregatorIDMaps() {
    dimensionsDescriptorIDToIncrementalAggregatorIDs = Lists.newArrayList();
    dimensionsDescriptorIDToAggregatorIDToInputAggregatorDescriptor = Lists.newArrayList();
    dimensionsDescriptorIDToAggregatorIDToOutputAggregatorDescriptor = Lists.newArrayList();
    for (int index = 0; index < dimensionsDescriptorIDToAggregatorToAggregateDescriptor.size(); index++) {
        IntArrayList aggIDList = new IntArrayList();
        Int2ObjectMap<FieldsDescriptor> inputMap = new Int2ObjectOpenHashMap<>();
        Int2ObjectMap<FieldsDescriptor> outputMap = new Int2ObjectOpenHashMap<>();
        dimensionsDescriptorIDToIncrementalAggregatorIDs.add(aggIDList);
        dimensionsDescriptorIDToAggregatorIDToInputAggregatorDescriptor.add(inputMap);
        dimensionsDescriptorIDToAggregatorIDToOutputAggregatorDescriptor.add(outputMap);
        for (Map.Entry<String, FieldsDescriptor> entry : dimensionsDescriptorIDToAggregatorToAggregateDescriptor.get(index).entrySet()) {
            buildNonCompositeAggregatorIDMap(entry.getKey(), entry.getValue(), aggIDList, inputMap, outputMap);
        }
    }
    // get the max aggregator id for generating the composite aggregator id
    int maxAggregatorID = getLargestNonCompositeAggregatorID();
    // assign aggregatorID to composite aggregators
    dimensionsDescriptorIDToCompositeAggregatorIDs = Lists.newArrayList();
    for (int index = 0; index < dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor.size(); index++) {
        IntArrayList aggIDList = new IntArrayList();
        // NOTE: share same map with incremental aggreator. As the input FD and output FD will be get from aggregatorID,
        // so it should be ok to share same map.
        Int2ObjectMap<FieldsDescriptor> inputMap = dimensionsDescriptorIDToAggregatorIDToInputAggregatorDescriptor.get(index);
        Int2ObjectMap<FieldsDescriptor> outputMap = dimensionsDescriptorIDToAggregatorIDToOutputAggregatorDescriptor.get(index);
        dimensionsDescriptorIDToCompositeAggregatorIDs.add(aggIDList);
        for (Map.Entry<String, FieldsDescriptor> entry : dimensionsDescriptorIDToCompositeAggregatorToAggregateDescriptor.get(index).entrySet()) {
            String aggregatorName = entry.getKey();
            FieldsDescriptor inputDescriptor = entry.getValue();
            AbstractCompositeAggregator compositeAggregator = aggregatorRegistry.getNameToTopBottomAggregator().get(aggregatorName);
            // simple use ++ to assign aggregator id
            int aggregatorID;
            Integer objAggregatorID = aggregatorRegistry.getTopBottomAggregatorNameToID().get(aggregatorName);
            if (objAggregatorID == null) {
                aggregatorID = ++maxAggregatorID;
                aggregatorRegistry.getTopBottomAggregatorNameToID().put(aggregatorName, aggregatorID);
            } else {
                aggregatorID = objAggregatorID;
            }
            aggIDList.add(aggregatorID);
            inputMap.put(aggregatorID, inputDescriptor);
            // buildNonCompositeAggregatorIDMap(getEmbededAggregatorName(aggregatorName), entry.getValue(), aggIDList,
            // inputMap, outputMap);
            outputMap.put(aggregatorID, AggregatorUtils.getOutputFieldsDescriptor(inputDescriptor, compositeAggregator));
        }
    }
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) AbstractCompositeAggregator(org.apache.apex.malhar.lib.dimensions.aggregator.AbstractCompositeAggregator) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) Map(java.util.Map) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap)

Example 14 with Int2ObjectOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project angel by Tencent.

the class SampleNeighbor method merge.

@Override
public GetResult merge(List<PartitionGetResult> partResults) {
    int len = 0;
    for (PartitionGetResult result : partResults) {
        len += ((PartSampleNeighborResult) result).getNodeIdToNeighbors().size();
    }
    Int2ObjectOpenHashMap<int[]> nodeIdToNeighbors = new Int2ObjectOpenHashMap<>(len);
    for (PartitionGetResult result : partResults) {
        nodeIdToNeighbors.putAll(((PartSampleNeighborResult) result).getNodeIdToNeighbors());
    }
    return new SampleNeighborResult(nodeIdToNeighbors);
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) PartitionGetResult(com.tencent.angel.ml.matrix.psf.get.base.PartitionGetResult)

Example 15 with Int2ObjectOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap in project angel by Tencent.

the class ComplexRowFormat method save.

private void save(ServerIntAnyRow row, PSMatrixSaveContext saveContext, MatrixPartitionMeta partMeta, DataOutputStream output) throws IOException {
    IntElementStorage storage = row.getStorage();
    long startPos = partMeta.getStartCol();
    if (storage instanceof IntArrayElementStorage) {
        IElement[] data = ((IntArrayElementStorage) storage).getData();
        for (int i = 0; i < data.length; i++) {
            save(i + startPos, data[i], output);
        }
    } else if (storage instanceof IntElementMapStorage) {
        Int2ObjectOpenHashMap<IElement> data = ((IntElementMapStorage) storage).getData();
        ObjectIterator<Int2ObjectMap.Entry<IElement>> iter = data.int2ObjectEntrySet().fastIterator();
        while (iter.hasNext()) {
            Int2ObjectMap.Entry<IElement> entry = iter.next();
            save(entry.getIntKey() + startPos, entry.getValue(), output);
        }
    }
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) IElement(com.tencent.angel.ps.storage.vector.element.IElement) Entry(java.util.Map.Entry) IntArrayElementStorage(com.tencent.angel.ps.storage.vector.storage.IntArrayElementStorage) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) IntElementMapStorage(com.tencent.angel.ps.storage.vector.storage.IntElementMapStorage) IntElementStorage(com.tencent.angel.ps.storage.vector.storage.IntElementStorage) ObjectIterator(it.unimi.dsi.fastutil.objects.ObjectIterator)

Aggregations

Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)29 Int2ObjectMap (it.unimi.dsi.fastutil.ints.Int2ObjectMap)8 List (java.util.List)7 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)6 HashSet (java.util.HashSet)6 Int2IntOpenHashMap (it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap)5 HashMap (java.util.HashMap)5 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)2 Preconditions (com.google.common.base.Preconditions)2 Stopwatch (com.google.common.base.Stopwatch)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Maps (com.google.common.collect.Maps)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)2 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)2 ObjectIterator (it.unimi.dsi.fastutil.objects.ObjectIterator)2 LinkedHashMap (java.util.LinkedHashMap)2 LinkedHashSet (java.util.LinkedHashSet)2