Search in sources :

Example 1 with IntBigArray

use of io.trino.array.IntBigArray in project trino by trinodb.

the class InMemoryHashAggregationBuilder method hashSortedGroupIds.

private IntIterator hashSortedGroupIds() {
    IntBigArray groupIds = new IntBigArray();
    groupIds.ensureCapacity(groupByHash.getGroupCount());
    for (int i = 0; i < groupByHash.getGroupCount(); i++) {
        groupIds.set(i, i);
    }
    groupIds.sort(0, groupByHash.getGroupCount(), (leftGroupId, rightGroupId) -> Long.compare(groupByHash.getRawHash(leftGroupId), groupByHash.getRawHash(rightGroupId)));
    return new AbstractIntIterator() {

        private final int totalPositions = groupByHash.getGroupCount();

        private int position;

        @Override
        public boolean hasNext() {
            return position < totalPositions;
        }

        @Override
        public int nextInt() {
            return groupIds.get(position++);
        }
    };
}
Also used : AbstractIntIterator(it.unimi.dsi.fastutil.ints.AbstractIntIterator) IntBigArray(io.trino.array.IntBigArray)

Example 2 with IntBigArray

use of io.trino.array.IntBigArray in project trino by trinodb.

the class SingleTypedHistogram method rehash.

private void rehash() {
    long newCapacityLong = hashCapacity * 2L;
    if (newCapacityLong > Integer.MAX_VALUE) {
        throw new TrinoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed 1 billion entries");
    }
    int newCapacity = (int) newCapacityLong;
    int newMask = newCapacity - 1;
    IntBigArray newHashPositions = new IntBigArray(-1);
    newHashPositions.ensureCapacity(newCapacity);
    for (int i = 0; i < values.getPositionCount(); i++) {
        // find an empty slot for the address
        int hashPosition = getBucketId(hashCodeOperator.hashCodeNullSafe(values, i), newMask);
        while (newHashPositions.get(hashPosition) != -1) {
            hashPosition = (hashPosition + 1) & newMask;
        }
        // record the mapping
        newHashPositions.set(hashPosition, i);
    }
    hashCapacity = newCapacity;
    mask = newMask;
    maxFill = calculateMaxFill(newCapacity);
    hashPositions = newHashPositions;
    this.counts.ensureCapacity(maxFill);
}
Also used : IntBigArray(io.trino.array.IntBigArray) TrinoException(io.trino.spi.TrinoException)

Example 3 with IntBigArray

use of io.trino.array.IntBigArray in project trino by trinodb.

the class ValueStore method rehash.

@VisibleForTesting
void rehash() {
    ++rehashCount;
    long newBucketCountLong = bucketCount * 2L;
    if (newBucketCountLong > Integer.MAX_VALUE) {
        throw new TrinoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed " + Integer.MAX_VALUE + " entries (" + newBucketCountLong + ")");
    }
    int newBucketCount = (int) newBucketCountLong;
    int newMask = newBucketCount - 1;
    IntBigArray newBuckets = new IntBigArray(-1);
    newBuckets.ensureCapacity(newBucketCount);
    for (int i = 0; i < values.getPositionCount(); i++) {
        long valueHash = valueHashes.get(i);
        int bucketId = getBucketId(valueHash, newMask);
        int probeCount = 1;
        while (newBuckets.get(bucketId) != EMPTY_BUCKET) {
            int probe = nextProbe(probeCount);
            bucketId = nextBucketId(bucketId, newMask, probe);
            probeCount++;
        }
        // record the mapping
        newBuckets.set(bucketId, i);
    }
    buckets = newBuckets;
    // worst case is every bucket has a unique value, so pre-emptively keep this large enough to have a value for ever bucket
    // TODO: could optimize the growth algorithm to be resize this only when necessary; this wastes memory but guarantees that if every value has a distinct hash, we have space
    valueHashes.ensureCapacity(newBucketCount);
    bucketCount = newBucketCount;
    maxFill = calculateMaxFill(newBucketCount, MAX_FILL_RATIO);
    mask = newMask;
}
Also used : IntBigArray(io.trino.array.IntBigArray) TrinoException(io.trino.spi.TrinoException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with IntBigArray

use of io.trino.array.IntBigArray in project trino by trinodb.

the class GroupedTypedHistogram method rehash.

private void rehash() {
    long newBucketCountLong = bucketCount * 2L;
    if (newBucketCountLong > Integer.MAX_VALUE) {
        throw new TrinoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed " + Integer.MAX_VALUE + " entries (" + newBucketCountLong + ")");
    }
    int newBucketCount = computeBucketCount((int) newBucketCountLong, MAX_FILL_RATIO);
    int newMask = newBucketCount - 1;
    IntBigArray newBuckets = new IntBigArray(-1);
    newBuckets.ensureCapacity(newBucketCount);
    for (int i = 0; i < nextNodePointer; i++) {
        // find the old one
        int bucketId = getBucketIdForNode(i, newMask);
        int probeCount = 1;
        int originalBucket = bucketId;
        // find new one
        while (newBuckets.get(bucketId) != -1) {
            int probe = nextProbe(probeCount);
            bucketId = nextBucketId(originalBucket, newMask, probe);
            probeCount++;
        }
        // record the mapping
        newBuckets.set(bucketId, i);
    }
    buckets = newBuckets;
    bucketCount = newBucketCount;
    maxFill = calculateMaxFill(newBucketCount, MAX_FILL_RATIO);
    mask = newMask;
    resizeNodeArrays(newBucketCount);
}
Also used : IntBigArray(io.trino.array.IntBigArray) TrinoException(io.trino.spi.TrinoException)

Example 5 with IntBigArray

use of io.trino.array.IntBigArray in project trino by trinodb.

the class SliceDictionaryColumnWriter method bufferOutputData.

private void bufferOutputData() {
    checkState(closed);
    checkState(!directEncoded);
    Block dictionaryElements = dictionary.getElementBlock();
    // write dictionary in sorted order
    int[] sortedDictionaryIndexes = getSortedDictionaryNullsLast(dictionaryElements);
    for (int sortedDictionaryIndex : sortedDictionaryIndexes) {
        if (!dictionaryElements.isNull(sortedDictionaryIndex)) {
            int length = dictionaryElements.getSliceLength(sortedDictionaryIndex);
            dictionaryLengthStream.writeLong(length);
            Slice value = dictionaryElements.getSlice(sortedDictionaryIndex, 0, length);
            dictionaryDataStream.writeSlice(value);
        }
    }
    columnEncoding = new ColumnEncoding(DICTIONARY_V2, dictionaryElements.getPositionCount() - 1);
    // build index from original dictionary index to new sorted position
    int[] originalDictionaryToSortedIndex = new int[sortedDictionaryIndexes.length];
    for (int sortOrdinal = 0; sortOrdinal < sortedDictionaryIndexes.length; sortOrdinal++) {
        int dictionaryIndex = sortedDictionaryIndexes[sortOrdinal];
        originalDictionaryToSortedIndex[dictionaryIndex] = sortOrdinal;
    }
    if (!rowGroups.isEmpty()) {
        presentStream.recordCheckpoint();
        dataStream.recordCheckpoint();
    }
    for (DictionaryRowGroup rowGroup : rowGroups) {
        IntBigArray dictionaryIndexes = rowGroup.getDictionaryIndexes();
        for (int position = 0; position < rowGroup.getValueCount(); position++) {
            presentStream.writeBoolean(dictionaryIndexes.get(position) != 0);
        }
        for (int position = 0; position < rowGroup.getValueCount(); position++) {
            int originalDictionaryIndex = dictionaryIndexes.get(position);
            // index zero in original dictionary is reserved for null
            if (originalDictionaryIndex != 0) {
                int sortedIndex = originalDictionaryToSortedIndex[originalDictionaryIndex];
                if (sortedIndex < 0) {
                    throw new IllegalArgumentException();
                }
                dataStream.writeLong(sortedIndex);
            }
        }
        presentStream.recordCheckpoint();
        dataStream.recordCheckpoint();
    }
    // free the dictionary memory
    dictionary.clear();
    dictionaryDataStream.close();
    dictionaryLengthStream.close();
    dataStream.close();
    presentStream.close();
}
Also used : ColumnEncoding(io.trino.orc.metadata.ColumnEncoding) Slice(io.airlift.slice.Slice) IntBigArray(io.trino.array.IntBigArray) DictionaryBlock(io.trino.spi.block.DictionaryBlock) Block(io.trino.spi.block.Block) BooleanStreamCheckpoint(io.trino.orc.checkpoint.BooleanStreamCheckpoint) LongStreamCheckpoint(io.trino.orc.checkpoint.LongStreamCheckpoint)

Aggregations

IntBigArray (io.trino.array.IntBigArray)6 TrinoException (io.trino.spi.TrinoException)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Slice (io.airlift.slice.Slice)1 BooleanStreamCheckpoint (io.trino.orc.checkpoint.BooleanStreamCheckpoint)1 LongStreamCheckpoint (io.trino.orc.checkpoint.LongStreamCheckpoint)1 ColumnEncoding (io.trino.orc.metadata.ColumnEncoding)1 ColumnStatistics (io.trino.orc.metadata.statistics.ColumnStatistics)1 Block (io.trino.spi.block.Block)1 DictionaryBlock (io.trino.spi.block.DictionaryBlock)1 AbstractIntIterator (it.unimi.dsi.fastutil.ints.AbstractIntIterator)1