use of it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap in project pinot by linkedin.
the class StarTreeDataTable method groupByIntColumnCount.
/**
*
* @param startDocId inclusive
* @param endDocId exclusive
* @param colIndex
* @return start,end for each value. inclusive start, exclusive end
*/
public Int2ObjectMap<IntPair> groupByIntColumnCount(int startDocId, int endDocId, Integer colIndex) {
MMapBuffer mappedByteBuffer = null;
try {
int length = endDocId - startDocId;
Int2ObjectMap<IntPair> rangeMap = new Int2ObjectLinkedOpenHashMap<>();
final long startOffset = startDocId * (long) totalSizeInBytes;
mappedByteBuffer = new MMapBuffer(file, startOffset, length * (long) totalSizeInBytes, MMapMode.READ_WRITE);
int prevValue = -1;
int prevStart = 0;
for (int i = 0; i < length; i++) {
int value = flipEndiannessIfNeeded(mappedByteBuffer.getInt((i * (long) totalSizeInBytes) + (colIndex * V1Constants.Numbers.INTEGER_SIZE)));
if (prevValue != -1 && prevValue != value) {
rangeMap.put(prevValue, new IntPair(startDocId + prevStart, startDocId + i));
prevStart = i;
}
prevValue = value;
}
rangeMap.put(prevValue, new IntPair(startDocId + prevStart, endDocId));
return rangeMap;
} catch (IOException e) {
e.printStackTrace();
} finally {
if (mappedByteBuffer != null) {
try {
mappedByteBuffer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return EMPTY_INT_OBJECT_MAP;
}
use of it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap in project druid by druid-io.
the class SeekableStreamSupervisor method updatePartitionDataFromStream.
private boolean updatePartitionDataFromStream() {
List<PartitionIdType> previousPartitionIds = new ArrayList<>(partitionIds);
Set<PartitionIdType> partitionIdsFromSupplier;
recordSupplierLock.lock();
try {
partitionIdsFromSupplier = recordSupplier.getPartitionIds(ioConfig.getStream());
if (shouldSkipIgnorablePartitions()) {
partitionIdsFromSupplier.removeAll(computeIgnorablePartitionIds());
}
} catch (Exception e) {
stateManager.recordThrowableEvent(e);
log.warn("Could not fetch partitions for topic/stream [%s]: %s", ioConfig.getStream(), e.getMessage());
log.debug(e, "full stack trace");
return false;
} finally {
recordSupplierLock.unlock();
}
if (partitionIdsFromSupplier == null || partitionIdsFromSupplier.size() == 0) {
String errMsg = StringUtils.format("No partitions found for stream [%s]", ioConfig.getStream());
stateManager.recordThrowableEvent(new StreamException(new ISE(errMsg)));
log.warn(errMsg);
return false;
}
log.debug("Found [%d] partitions for stream [%s]", partitionIdsFromSupplier.size(), ioConfig.getStream());
Map<PartitionIdType, SequenceOffsetType> storedMetadata = getOffsetsFromMetadataStorage();
Set<PartitionIdType> storedPartitions = storedMetadata.keySet();
Set<PartitionIdType> closedPartitions = storedMetadata.entrySet().stream().filter(x -> isEndOfShard(x.getValue())).map(Entry::getKey).collect(Collectors.toSet());
Set<PartitionIdType> previouslyExpiredPartitions = storedMetadata.entrySet().stream().filter(x -> isShardExpirationMarker(x.getValue())).map(Entry::getKey).collect(Collectors.toSet());
Set<PartitionIdType> partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions = Sets.difference(partitionIdsFromSupplier, previouslyExpiredPartitions);
Set<PartitionIdType> activePartitionsIdsFromSupplier = Sets.difference(partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions, closedPartitions);
Set<PartitionIdType> newlyClosedPartitions = Sets.intersection(closedPartitions, new HashSet<>(previousPartitionIds));
log.debug("active partitions from supplier: " + activePartitionsIdsFromSupplier);
if (partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions.size() != partitionIdsFromSupplier.size()) {
// this should never happen, but we check for it and exclude the expired partitions if they somehow reappear
log.warn("Previously expired partitions [%s] were present in the current list [%s] from the record supplier.", previouslyExpiredPartitions, partitionIdsFromSupplier);
}
if (activePartitionsIdsFromSupplier.size() == 0) {
String errMsg = StringUtils.format("No active partitions found for stream [%s] after removing closed and previously expired partitions", ioConfig.getStream());
stateManager.recordThrowableEvent(new StreamException(new ISE(errMsg)));
log.warn(errMsg);
return false;
}
boolean initialPartitionDiscovery = this.partitionIds.isEmpty();
for (PartitionIdType partitionId : partitionIdsFromSupplierWithoutPreviouslyExpiredPartitions) {
if (closedPartitions.contains(partitionId)) {
log.info("partition [%s] is closed and has no more data, skipping.", partitionId);
continue;
}
if (!this.partitionIds.contains(partitionId)) {
partitionIds.add(partitionId);
if (!initialPartitionDiscovery) {
subsequentlyDiscoveredPartitions.add(partitionId);
}
}
}
// partitions across tasks.
if (supportsPartitionExpiration()) {
cleanupClosedAndExpiredPartitions(storedPartitions, newlyClosedPartitions, activePartitionsIdsFromSupplier, previouslyExpiredPartitions, partitionIdsFromSupplier);
}
Int2ObjectMap<List<PartitionIdType>> newlyDiscovered = new Int2ObjectLinkedOpenHashMap<>();
for (PartitionIdType partitionId : activePartitionsIdsFromSupplier) {
int taskGroupId = getTaskGroupIdForPartition(partitionId);
Set<PartitionIdType> partitionGroup = partitionGroups.computeIfAbsent(taskGroupId, k -> new HashSet<>());
partitionGroup.add(partitionId);
if (partitionOffsets.putIfAbsent(partitionId, getNotSetMarker()) == null) {
log.debug("New partition [%s] discovered for stream [%s], added to task group [%d]", partitionId, ioConfig.getStream(), taskGroupId);
newlyDiscovered.computeIfAbsent(taskGroupId, k -> new ArrayList<>()).add(partitionId);
}
}
if (newlyDiscovered.size() > 0) {
for (Int2ObjectMap.Entry<List<PartitionIdType>> taskGroupPartitions : newlyDiscovered.int2ObjectEntrySet()) {
log.info("New partitions %s discovered for stream [%s], added to task group [%s]", taskGroupPartitions.getValue(), ioConfig.getStream(), taskGroupPartitions.getIntKey());
}
}
if (!partitionIds.equals(previousPartitionIds)) {
assignRecordSupplierToPartitionIds();
// repartitioning quickly by creating new tasks
for (TaskGroup taskGroup : activelyReadingTaskGroups.values()) {
if (!taskGroup.taskIds().isEmpty()) {
// Partitions have changed and we are managing active tasks - set an early publish time
// at the current time + repartitionTransitionDuration.
// This allows time for the stream to start writing to the new partitions after repartitioning.
// For Kinesis ingestion, this cooldown time is particularly useful, lowering the possibility of
// the new shards being empty, which can cause issues presently
// (see https://github.com/apache/druid/issues/7600)
earlyStopTime = DateTimes.nowUtc().plus(tuningConfig.getRepartitionTransitionDuration());
log.info("Previous partition set [%s] has changed to [%s] - requesting that tasks stop after [%s] at [%s]", previousPartitionIds, partitionIds, tuningConfig.getRepartitionTransitionDuration(), earlyStopTime);
break;
}
}
}
return true;
}
Aggregations