use of io.trino.operator.aggregation.TypedSet in project trino by trinodb.
the class ArrayUnionFunction method union.
@TypeParameter("E")
@SqlType("array(E)")
public static Block union(@TypeParameter("E") Type type, @OperatorDependency(operator = EQUAL, argumentTypes = { "E", "E" }, convention = @Convention(arguments = { BLOCK_POSITION, BLOCK_POSITION }, result = NULLABLE_RETURN)) BlockPositionEqual elementEqual, @OperatorDependency(operator = HASH_CODE, argumentTypes = "E", convention = @Convention(arguments = BLOCK_POSITION, result = FAIL_ON_NULL)) BlockPositionHashCode elementHashCode, @SqlType("array(E)") Block leftArray, @SqlType("array(E)") Block rightArray) {
int leftArrayCount = leftArray.getPositionCount();
int rightArrayCount = rightArray.getPositionCount();
BlockBuilder distinctElementBlockBuilder = type.createBlockBuilder(null, leftArrayCount + rightArrayCount);
TypedSet typedSet = createEqualityTypedSet(type, elementEqual, elementHashCode, distinctElementBlockBuilder, leftArrayCount + rightArrayCount, "array_union");
for (int i = 0; i < leftArray.getPositionCount(); i++) {
typedSet.add(leftArray, i);
}
for (int i = 0; i < rightArray.getPositionCount(); i++) {
typedSet.add(rightArray, i);
}
return distinctElementBlockBuilder.build();
}
use of io.trino.operator.aggregation.TypedSet in project trino by trinodb.
the class DynamicFilterSourceOperator method addInput.
@Override
public void addInput(Page page) {
verify(!finished, "DynamicFilterSourceOperator: addInput() may not be called after finish()");
current = page;
if (valueSets == null) {
if (minValues == null) {
// there are too many rows to collect min/max range
return;
}
minMaxCollectionLimit -= page.getPositionCount();
if (minMaxCollectionLimit < 0) {
handleMinMaxCollectionLimitExceeded();
return;
}
// the predicate became too large, record only min and max values for each orderable channel
for (int i = 0; i < minMaxChannels.size(); i++) {
Integer channelIndex = minMaxChannels.get(i);
BlockPositionComparison comparison = minMaxComparisons.get(i);
Block block = page.getBlock(channels.get(channelIndex).index);
updateMinMaxValues(block, channelIndex, comparison);
}
return;
}
minMaxCollectionLimit -= page.getPositionCount();
// TODO: we should account for the memory used for collecting build-side values using MemoryContext
long filterSizeInBytes = 0;
int filterMaxDistinctValues = 0;
// Collect only the columns which are relevant for the JOIN.
for (int channelIndex = 0; channelIndex < channels.size(); ++channelIndex) {
Block block = page.getBlock(channels.get(channelIndex).index);
TypedSet valueSet = valueSets[channelIndex];
for (int position = 0; position < block.getPositionCount(); ++position) {
valueSet.add(block, position);
}
filterSizeInBytes += valueSet.getRetainedSizeInBytes();
filterMaxDistinctValues = Math.max(filterMaxDistinctValues, valueSet.size());
}
if (filterMaxDistinctValues > maxDistinctValues || filterSizeInBytes > maxFilterSizeInBytes) {
// The whole filter (summed over all columns) exceeds maxFilterSizeInBytes or a column contains too many distinct values
handleTooLargePredicate();
}
}
use of io.trino.operator.aggregation.TypedSet in project trino by trinodb.
the class MultimapAggregationFunction method output.
public static void output(Type keyType, BlockPositionEqual keyEqual, BlockPositionHashCode keyHashCode, Type valueType, MultimapAggregationState state, BlockBuilder out) {
if (state.isEmpty()) {
out.appendNull();
} else {
// TODO: Avoid copy value block associated with the same key by using strategy similar to multimap_from_entries
ObjectBigArray<BlockBuilder> valueArrayBlockBuilders = new ObjectBigArray<>();
valueArrayBlockBuilders.ensureCapacity(state.getEntryCount());
BlockBuilder distinctKeyBlockBuilder = keyType.createBlockBuilder(null, state.getEntryCount(), expectedValueSize(keyType, 100));
TypedSet keySet = createEqualityTypedSet(keyType, keyEqual, keyHashCode, state.getEntryCount(), NAME);
state.forEach((key, value, keyValueIndex) -> {
// Merge values of the same key into an array
if (keySet.add(key, keyValueIndex)) {
keyType.appendTo(key, keyValueIndex, distinctKeyBlockBuilder);
BlockBuilder valueArrayBuilder = valueType.createBlockBuilder(null, 10, expectedValueSize(valueType, EXPECTED_ENTRY_SIZE));
valueArrayBlockBuilders.set(keySet.positionOf(key, keyValueIndex), valueArrayBuilder);
}
valueType.appendTo(value, keyValueIndex, valueArrayBlockBuilders.get(keySet.positionOf(key, keyValueIndex)));
});
// Write keys and value arrays into one Block
Type valueArrayType = new ArrayType(valueType);
BlockBuilder multimapBlockBuilder = out.beginBlockEntry();
for (int i = 0; i < distinctKeyBlockBuilder.getPositionCount(); i++) {
keyType.appendTo(distinctKeyBlockBuilder, i, multimapBlockBuilder);
valueArrayType.writeObject(multimapBlockBuilder, valueArrayBlockBuilders.get(i).build());
}
out.closeEntry();
}
}
use of io.trino.operator.aggregation.TypedSet in project trino by trinodb.
the class BenchmarkArrayDistinct method oldArrayDistinct.
@ScalarFunction
@SqlType("array(varchar)")
public static Block oldArrayDistinct(@SqlType("array(varchar)") Block array) {
if (array.getPositionCount() == 0) {
return array;
}
TypedSet typedSet = createEqualityTypedSet(VARCHAR, EQUAL_OPERATOR, HASH_CODE_OPERATOR, array.getPositionCount(), "old_array_distinct");
BlockBuilder distinctElementBlockBuilder = VARCHAR.createBlockBuilder(null, array.getPositionCount());
for (int i = 0; i < array.getPositionCount(); i++) {
if (!typedSet.contains(array, i)) {
typedSet.add(array, i);
VARCHAR.appendTo(array, i, distinctElementBlockBuilder);
}
}
return distinctElementBlockBuilder.build();
}
use of io.trino.operator.aggregation.TypedSet in project trino by trinodb.
the class MapFromEntriesFunction method mapFromEntries.
@TypeParameter("K")
@TypeParameter("V")
@SqlType("map(K,V)")
@SqlNullable
public Block mapFromEntries(@OperatorDependency(operator = EQUAL, argumentTypes = { "K", "K" }, convention = @Convention(arguments = { BLOCK_POSITION, BLOCK_POSITION }, result = NULLABLE_RETURN)) BlockPositionEqual keyEqual, @OperatorDependency(operator = HASH_CODE, argumentTypes = "K", convention = @Convention(arguments = BLOCK_POSITION, result = FAIL_ON_NULL)) BlockPositionHashCode keyHashCode, @TypeParameter("map(K,V)") MapType mapType, ConnectorSession session, @SqlType("array(row(K,V))") Block mapEntries) {
Type keyType = mapType.getKeyType();
Type valueType = mapType.getValueType();
RowType mapEntryType = RowType.anonymous(ImmutableList.of(keyType, valueType));
if (pageBuilder.isFull()) {
pageBuilder.reset();
}
int entryCount = mapEntries.getPositionCount();
BlockBuilder mapBlockBuilder = pageBuilder.getBlockBuilder(0);
BlockBuilder resultBuilder = mapBlockBuilder.beginBlockEntry();
TypedSet uniqueKeys = createEqualityTypedSet(keyType, keyEqual, keyHashCode, entryCount, "map_from_entries");
for (int i = 0; i < entryCount; i++) {
if (mapEntries.isNull(i)) {
mapBlockBuilder.closeEntry();
pageBuilder.declarePosition();
throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "map entry cannot be null");
}
Block mapEntryBlock = mapEntryType.getObject(mapEntries, i);
if (mapEntryBlock.isNull(0)) {
mapBlockBuilder.closeEntry();
pageBuilder.declarePosition();
throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "map key cannot be null");
}
if (!uniqueKeys.add(mapEntryBlock, 0)) {
mapBlockBuilder.closeEntry();
pageBuilder.declarePosition();
throw new TrinoException(INVALID_FUNCTION_ARGUMENT, format("Duplicate keys (%s) are not allowed", keyType.getObjectValue(session, mapEntryBlock, 0)));
}
keyType.appendTo(mapEntryBlock, 0, resultBuilder);
valueType.appendTo(mapEntryBlock, 1, resultBuilder);
}
mapBlockBuilder.closeEntry();
pageBuilder.declarePosition();
return mapType.getObject(mapBlockBuilder, mapBlockBuilder.getPositionCount() - 1);
}
Aggregations