Search in sources :

Example 1 with Int2ObjectLinkedOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap in project pinot by linkedin.

the class StarTreeDataTable method groupByIntColumnCount.

/**
   *
   * @param startDocId inclusive
   * @param endDocId exclusive
   * @param colIndex
   * @return start,end for each value. inclusive start, exclusive end
   */
public Int2ObjectMap<IntPair> groupByIntColumnCount(int startDocId, int endDocId, Integer colIndex) {
    MMapBuffer mappedByteBuffer = null;
    try {
        int length = endDocId - startDocId;
        Int2ObjectMap<IntPair> rangeMap = new Int2ObjectLinkedOpenHashMap<>();
        final long startOffset = startDocId * (long) totalSizeInBytes;
        mappedByteBuffer = new MMapBuffer(file, startOffset, length * (long) totalSizeInBytes, MMapMode.READ_WRITE);
        int prevValue = -1;
        int prevStart = 0;
        for (int i = 0; i < length; i++) {
            int value = flipEndiannessIfNeeded(mappedByteBuffer.getInt((i * (long) totalSizeInBytes) + (colIndex * V1Constants.Numbers.INTEGER_SIZE)));
            if (prevValue != -1 && prevValue != value) {
                rangeMap.put(prevValue, new IntPair(startDocId + prevStart, startDocId + i));
                prevStart = i;
            }
            prevValue = value;
        }
        rangeMap.put(prevValue, new IntPair(startDocId + prevStart, endDocId));
        return rangeMap;
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (mappedByteBuffer != null) {
            try {
                mappedByteBuffer.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    return EMPTY_INT_OBJECT_MAP;
}
Also used : MMapBuffer(xerial.larray.mmap.MMapBuffer) Int2ObjectLinkedOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap) IOException(java.io.IOException) IntPair(com.linkedin.pinot.common.utils.Pairs.IntPair)

Example 2 with Int2ObjectLinkedOpenHashMap

use of it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap in project druid by druid-io.

the class SeekableStreamSupervisor method updatePartitionDataFromStream.

private boolean updatePartitionDataFromStream() {
    List<PartitionIdType> previousPartitionIds = new ArrayList<>(partitionIds);
    Set<PartitionIdType> partitionIdsFromSupplier;
    recordSupplierLock.lock();
    try {
        partitionIdsFromSupplier = recordSupplier.getPartitionIds(ioConfig.getStream());
        if (shouldSkipIgnorablePartitions()) {
            partitionIdsFromSupplier.removeAll(computeIgnorablePartitionIds());
        }
    } catch (Exception e) {
        stateManager.recordThrowableEvent(e);
        log.warn("Could not fetch partitions for topic/stream [%s]: %s", ioConfig.getStream(), e.getMessage());
        log.debug(e, "full stack trace");
        return false;
    } finally {
        recordSupplierLock.unlock();
    }
    if (partitionIdsFromSupplier == null || partitionIdsFromSupplier.size() == 0) {
        String errMsg = StringUtils.format("No partitions found for stream [%s]", ioConfig.getStream());
        stateManager.recordThrowableEvent(new StreamException(new ISE(errMsg)));
        log.warn(errMsg);
        return false;
    }
    log.debug("Found [%d] partitions for stream [%s]", partitionIdsFromSupplier.size(), ioConfig.getStream());
    Map<PartitionIdType, SequenceOffsetType> storedMetadata = getOffsetsFromMetadataStorage();
    Set<PartitionIdType> storedPartitions = storedMetadata.keySet();
    Set<PartitionIdType> closedPartitions = storedMetadata.entrySet().stream().filter(x -> isEndOfShard(x.getValue())).map(Entry::getKey).collect(Collectors.toSet());
    Set<PartitionIdType> previouslyExpiredPartitions = storedMetadata.entrySet().stream().filter(x -> isShardExpirationMarker(x.getValue())).map(Entry::getKey).collect(Collectors.toSet());
    Set<PartitionIdType> partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions = Sets.difference(partitionIdsFromSupplier, previouslyExpiredPartitions);
    Set<PartitionIdType> activePartitionsIdsFromSupplier = Sets.difference(partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions, closedPartitions);
    Set<PartitionIdType> newlyClosedPartitions = Sets.intersection(closedPartitions, new HashSet<>(previousPartitionIds));
    log.debug("active partitions from supplier: " + activePartitionsIdsFromSupplier);
    if (partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions.size() != partitionIdsFromSupplier.size()) {
        // this should never happen, but we check for it and exclude the expired partitions if they somehow reappear
        log.warn("Previously expired partitions [%s] were present in the current list [%s] from the record supplier.", previouslyExpiredPartitions, partitionIdsFromSupplier);
    }
    if (activePartitionsIdsFromSupplier.size() == 0) {
        String errMsg = StringUtils.format("No active partitions found for stream [%s] after removing closed and previously expired partitions", ioConfig.getStream());
        stateManager.recordThrowableEvent(new StreamException(new ISE(errMsg)));
        log.warn(errMsg);
        return false;
    }
    boolean initialPartitionDiscovery = this.partitionIds.isEmpty();
    for (PartitionIdType partitionId : partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions) {
        if (closedPartitions.contains(partitionId)) {
            log.info("partition [%s] is closed and has no more data, skipping.", partitionId);
            continue;
        }
        if (!this.partitionIds.contains(partitionId)) {
            partitionIds.add(partitionId);
            if (!initialPartitionDiscovery) {
                subsequentlyDiscoveredPartitions.add(partitionId);
            }
        }
    }
    // partitions across tasks.
    if (supportsPartitionExpiration()) {
        cleanupClosedAndExpiredPartitions(storedPartitions, newlyClosedPartitions, activePartitionsIdsFromSupplier, previouslyExpiredPartitions, partitionIdsFromSupplier);
    }
    Int2ObjectMap<List<PartitionIdType>> newlyDiscovered = new Int2ObjectLinkedOpenHashMap<>();
    for (PartitionIdType partitionId : activePartitionsIdsFromSupplier) {
        int taskGroupId = getTaskGroupIdForPartition(partitionId);
        Set<PartitionIdType> partitionGroup = partitionGroups.computeIfAbsent(taskGroupId, k -> new HashSet<>());
        partitionGroup.add(partitionId);
        if (partitionOffsets.putIfAbsent(partitionId, getNotSetMarker()) == null) {
            log.debug("New partition [%s] discovered for stream [%s], added to task group [%d]", partitionId, ioConfig.getStream(), taskGroupId);
            newlyDiscovered.computeIfAbsent(taskGroupId, k -> new ArrayList<>()).add(partitionId);
        }
    }
    if (newlyDiscovered.size() > 0) {
        for (Int2ObjectMap.Entry<List<PartitionIdType>> taskGroupPartitions : newlyDiscovered.int2ObjectEntrySet()) {
            log.info("New partitions %s discovered for stream [%s], added to task group [%s]", taskGroupPartitions.getValue(), ioConfig.getStream(), taskGroupPartitions.getIntKey());
        }
    }
    if (!partitionIds.equals(previousPartitionIds)) {
        assignRecordSupplierToPartitionIds();
        // repartitioning quickly by creating new tasks
        for (TaskGroup taskGroup : activelyReadingTaskGroups.values()) {
            if (!taskGroup.taskIds().isEmpty()) {
                // Partitions have changed and we are managing active tasks - set an early publish time
                // at the current time + repartitionTransitionDuration.
                // This allows time for the stream to start writing to the new partitions after repartitioning.
                // For Kinesis ingestion, this cooldown time is particularly useful, lowering the possibility of
                // the new shards being empty, which can cause issues presently
                // (see https://github.com/apache/druid/issues/7600)
                earlyStopTime = DateTimes.nowUtc().plus(tuningConfig.getRepartitionTransitionDuration());
                log.info("Previous partition set [%s] has changed to [%s] - requesting that tasks stop after [%s] at [%s]", previousPartitionIds, partitionIds, tuningConfig.getRepartitionTransitionDuration(), earlyStopTime);
                break;
            }
        }
    }
    return true;
}
Also used : SeekableStreamIndexTask(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTask) Pair(org.apache.druid.java.util.common.Pair) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) TaskQueue(org.apache.druid.indexing.overlord.TaskQueue) Optional(com.google.common.base.Optional) TaskRunner(org.apache.druid.indexing.overlord.TaskRunner) Duration(java.time.Duration) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) Execs(org.apache.druid.java.util.common.concurrent.Execs) SeekableStreamDataSourceMetadata(org.apache.druid.indexing.seekablestream.SeekableStreamDataSourceMetadata) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) NotNull(javax.validation.constraints.NotNull) Int2ObjectLinkedOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap) LagStats(org.apache.druid.indexing.overlord.supervisor.autoscaler.LagStats) TaskState(org.apache.druid.indexer.TaskState) Stream(java.util.stream.Stream) Predicate(com.google.common.base.Predicate) RowIngestionMetersFactory(org.apache.druid.segment.incremental.RowIngestionMetersFactory) TaskMaster(org.apache.druid.indexing.overlord.TaskMaster) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) Joiner(com.google.common.base.Joiner) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) Iterables(com.google.common.collect.Iterables) SupervisorStateManager(org.apache.druid.indexing.overlord.supervisor.SupervisorStateManager) Callable(java.util.concurrent.Callable) TaskStatus(org.apache.druid.indexer.TaskStatus) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) EntryExistsException(org.apache.druid.metadata.EntryExistsException) SeekableStreamIndexTaskIOConfig(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskIOConfig) StringComparators(org.apache.druid.query.ordering.StringComparators) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) BiConsumer(java.util.function.BiConsumer) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) AutoScalerConfig(org.apache.druid.indexing.seekablestream.supervisor.autoscaler.AutoScalerConfig) RetryUtils(org.apache.druid.java.util.common.RetryUtils) SeekableStreamIndexTaskClientFactory(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskClientFactory) Nullable(javax.annotation.Nullable) SeekableStreamIndexTaskTuningConfig(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskTuningConfig) SeekableStreamIndexTaskClient(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskClient) BlockingDeque(java.util.concurrent.BlockingDeque) ServiceMetricEvent(org.apache.druid.java.util.emitter.service.ServiceMetricEvent) TaskLocation(org.apache.druid.indexer.TaskLocation) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Futures(com.google.common.util.concurrent.Futures) TaskInfoProvider(org.apache.druid.indexing.common.TaskInfoProvider) TreeMap(java.util.TreeMap) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) LinkedBlockingDeque(java.util.concurrent.LinkedBlockingDeque) SupervisorReport(org.apache.druid.indexing.overlord.supervisor.SupervisorReport) Preconditions(com.google.common.base.Preconditions) DataSchema(org.apache.druid.segment.indexing.DataSchema) SeekableStreamSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamSequenceNumbers) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) TimeoutException(java.util.concurrent.TimeoutException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Task(org.apache.druid.indexing.common.task.Task) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) DateTimes(org.apache.druid.java.util.common.DateTimes) Function(com.google.common.base.Function) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) OrderedSequenceNumber(org.apache.druid.indexing.seekablestream.common.OrderedSequenceNumber) StreamException(org.apache.druid.indexing.seekablestream.common.StreamException) List(java.util.List) MetadataSupervisorManager(org.apache.druid.metadata.MetadataSupervisorManager) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) IndexerMetadataStorageCoordinator(org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator) Entry(java.util.Map.Entry) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) SortedMap(java.util.SortedMap) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) RecordSupplier(org.apache.druid.indexing.seekablestream.common.RecordSupplier) HashMap(java.util.HashMap) HashSet(java.util.HashSet) MapperFeature(com.fasterxml.jackson.databind.MapperFeature) ImmutableList(com.google.common.collect.ImmutableList) IndexTaskClient(org.apache.druid.indexing.common.IndexTaskClient) TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) ExecutorService(java.util.concurrent.ExecutorService) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) Iterator(java.util.Iterator) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TimeUnit(java.util.concurrent.TimeUnit) TaskRunnerWorkItem(org.apache.druid.indexing.overlord.TaskRunnerWorkItem) VisibleForTesting(com.google.common.annotations.VisibleForTesting) DigestUtils(org.apache.commons.codec.digest.DigestUtils) Supervisor(org.apache.druid.indexing.overlord.supervisor.Supervisor) Comparator(java.util.Comparator) Collections(java.util.Collections) SeekableStreamIndexTaskRunner(org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskRunner) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) EntryExistsException(org.apache.druid.metadata.EntryExistsException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) StreamException(org.apache.druid.indexing.seekablestream.common.StreamException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) StreamException(org.apache.druid.indexing.seekablestream.common.StreamException) ISE(org.apache.druid.java.util.common.ISE) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Int2ObjectLinkedOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap)

Aggregations

Int2ObjectLinkedOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap)2 IOException (java.io.IOException)2 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)1 MapperFeature (com.fasterxml.jackson.databind.MapperFeature)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Function (com.google.common.base.Function)1 Joiner (com.google.common.base.Joiner)1 Optional (com.google.common.base.Optional)1 Preconditions (com.google.common.base.Preconditions)1 Predicate (com.google.common.base.Predicate)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Iterables (com.google.common.collect.Iterables)1 Sets (com.google.common.collect.Sets)1 Futures (com.google.common.util.concurrent.Futures)1 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 ListeningExecutorService (com.google.common.util.concurrent.ListeningExecutorService)1 MoreExecutors (com.google.common.util.concurrent.MoreExecutors)1