use of io.trino.array.IntBigArray in project trino by trinodb.
the class InMemoryHashAggregationBuilder method hashSortedGroupIds.
private IntIterator hashSortedGroupIds() {
IntBigArray groupIds = new IntBigArray();
groupIds.ensureCapacity(groupByHash.getGroupCount());
for (int i = 0; i < groupByHash.getGroupCount(); i++) {
groupIds.set(i, i);
}
groupIds.sort(0, groupByHash.getGroupCount(), (leftGroupId, rightGroupId) -> Long.compare(groupByHash.getRawHash(leftGroupId), groupByHash.getRawHash(rightGroupId)));
return new AbstractIntIterator() {
private final int totalPositions = groupByHash.getGroupCount();
private int position;
@Override
public boolean hasNext() {
return position < totalPositions;
}
@Override
public int nextInt() {
return groupIds.get(position++);
}
};
}
use of io.trino.array.IntBigArray in project trino by trinodb.
the class SingleTypedHistogram method rehash.
private void rehash() {
long newCapacityLong = hashCapacity * 2L;
if (newCapacityLong > Integer.MAX_VALUE) {
throw new TrinoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed 1 billion entries");
}
int newCapacity = (int) newCapacityLong;
int newMask = newCapacity - 1;
IntBigArray newHashPositions = new IntBigArray(-1);
newHashPositions.ensureCapacity(newCapacity);
for (int i = 0; i < values.getPositionCount(); i++) {
// find an empty slot for the address
int hashPosition = getBucketId(hashCodeOperator.hashCodeNullSafe(values, i), newMask);
while (newHashPositions.get(hashPosition) != -1) {
hashPosition = (hashPosition + 1) & newMask;
}
// record the mapping
newHashPositions.set(hashPosition, i);
}
hashCapacity = newCapacity;
mask = newMask;
maxFill = calculateMaxFill(newCapacity);
hashPositions = newHashPositions;
this.counts.ensureCapacity(maxFill);
}
use of io.trino.array.IntBigArray in project trino by trinodb.
the class ValueStore method rehash.
@VisibleForTesting
void rehash() {
++rehashCount;
long newBucketCountLong = bucketCount * 2L;
if (newBucketCountLong > Integer.MAX_VALUE) {
throw new TrinoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed " + Integer.MAX_VALUE + " entries (" + newBucketCountLong + ")");
}
int newBucketCount = (int) newBucketCountLong;
int newMask = newBucketCount - 1;
IntBigArray newBuckets = new IntBigArray(-1);
newBuckets.ensureCapacity(newBucketCount);
for (int i = 0; i < values.getPositionCount(); i++) {
long valueHash = valueHashes.get(i);
int bucketId = getBucketId(valueHash, newMask);
int probeCount = 1;
while (newBuckets.get(bucketId) != EMPTY_BUCKET) {
int probe = nextProbe(probeCount);
bucketId = nextBucketId(bucketId, newMask, probe);
probeCount++;
}
// record the mapping
newBuckets.set(bucketId, i);
}
buckets = newBuckets;
// worst case is every bucket has a unique value, so pre-emptively keep this large enough to have a value for ever bucket
// TODO: could optimize the growth algorithm to be resize this only when necessary; this wastes memory but guarantees that if every value has a distinct hash, we have space
valueHashes.ensureCapacity(newBucketCount);
bucketCount = newBucketCount;
maxFill = calculateMaxFill(newBucketCount, MAX_FILL_RATIO);
mask = newMask;
}
use of io.trino.array.IntBigArray in project trino by trinodb.
the class GroupedTypedHistogram method rehash.
private void rehash() {
long newBucketCountLong = bucketCount * 2L;
if (newBucketCountLong > Integer.MAX_VALUE) {
throw new TrinoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed " + Integer.MAX_VALUE + " entries (" + newBucketCountLong + ")");
}
int newBucketCount = computeBucketCount((int) newBucketCountLong, MAX_FILL_RATIO);
int newMask = newBucketCount - 1;
IntBigArray newBuckets = new IntBigArray(-1);
newBuckets.ensureCapacity(newBucketCount);
for (int i = 0; i < nextNodePointer; i++) {
// find the old one
int bucketId = getBucketIdForNode(i, newMask);
int probeCount = 1;
int originalBucket = bucketId;
// find new one
while (newBuckets.get(bucketId) != -1) {
int probe = nextProbe(probeCount);
bucketId = nextBucketId(originalBucket, newMask, probe);
probeCount++;
}
// record the mapping
newBuckets.set(bucketId, i);
}
buckets = newBuckets;
bucketCount = newBucketCount;
maxFill = calculateMaxFill(newBucketCount, MAX_FILL_RATIO);
mask = newMask;
resizeNodeArrays(newBucketCount);
}
use of io.trino.array.IntBigArray in project trino by trinodb.
the class SliceDictionaryColumnWriter method bufferOutputData.
private void bufferOutputData() {
checkState(closed);
checkState(!directEncoded);
Block dictionaryElements = dictionary.getElementBlock();
// write dictionary in sorted order
int[] sortedDictionaryIndexes = getSortedDictionaryNullsLast(dictionaryElements);
for (int sortedDictionaryIndex : sortedDictionaryIndexes) {
if (!dictionaryElements.isNull(sortedDictionaryIndex)) {
int length = dictionaryElements.getSliceLength(sortedDictionaryIndex);
dictionaryLengthStream.writeLong(length);
Slice value = dictionaryElements.getSlice(sortedDictionaryIndex, 0, length);
dictionaryDataStream.writeSlice(value);
}
}
columnEncoding = new ColumnEncoding(DICTIONARY_V2, dictionaryElements.getPositionCount() - 1);
// build index from original dictionary index to new sorted position
int[] originalDictionaryToSortedIndex = new int[sortedDictionaryIndexes.length];
for (int sortOrdinal = 0; sortOrdinal < sortedDictionaryIndexes.length; sortOrdinal++) {
int dictionaryIndex = sortedDictionaryIndexes[sortOrdinal];
originalDictionaryToSortedIndex[dictionaryIndex] = sortOrdinal;
}
if (!rowGroups.isEmpty()) {
presentStream.recordCheckpoint();
dataStream.recordCheckpoint();
}
for (DictionaryRowGroup rowGroup : rowGroups) {
IntBigArray dictionaryIndexes = rowGroup.getDictionaryIndexes();
for (int position = 0; position < rowGroup.getValueCount(); position++) {
presentStream.writeBoolean(dictionaryIndexes.get(position) != 0);
}
for (int position = 0; position < rowGroup.getValueCount(); position++) {
int originalDictionaryIndex = dictionaryIndexes.get(position);
// index zero in original dictionary is reserved for null
if (originalDictionaryIndex != 0) {
int sortedIndex = originalDictionaryToSortedIndex[originalDictionaryIndex];
if (sortedIndex < 0) {
throw new IllegalArgumentException();
}
dataStream.writeLong(sortedIndex);
}
}
presentStream.recordCheckpoint();
dataStream.recordCheckpoint();
}
// free the dictionary memory
dictionary.clear();
dictionaryDataStream.close();
dictionaryLengthStream.close();
dataStream.close();
presentStream.close();
}
Aggregations