Search in sources :

Example 1 with ValueSet

use of io.prestosql.spi.predicate.ValueSet in project hetu-core by openlookeng.

the class TestBloomIndex method testDomainMatching.

@Test
public void testDomainMatching() throws IOException {
    try (TempFolder folder = new TempFolder();
        BloomIndex stringBloomIndex = new BloomIndex()) {
        folder.create();
        File testFile = folder.newFile();
        List<Object> testValues = ImmutableList.of("a", "ab", "测试", "\n", "%#!", ":dfs");
        stringBloomIndex.setExpectedNumOfEntries(testValues.size());
        stringBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", testValues)));
        try (FileOutputStream fo = new FileOutputStream(testFile)) {
            stringBloomIndex.serialize(fo);
        }
        try (FileInputStream fi = new FileInputStream(testFile)) {
            stringBloomIndex.deserialize(fi);
        }
        ValueSet valueSet = mock(ValueSet.class);
        when(valueSet.isSingleValue()).thenReturn(true);
        when(valueSet.getType()).thenReturn(VARCHAR);
        when(valueSet.getSingleValue()).thenReturn("a");
        assertTrue(stringBloomIndex.matches(Domain.create(valueSet, false)));
        when(valueSet.getSingleValue()).thenReturn("%#!");
        assertTrue(stringBloomIndex.matches(Domain.create(valueSet, false)));
        when(valueSet.getSingleValue()).thenReturn("bb");
        assertFalse(stringBloomIndex.matches(Domain.create(valueSet, false)));
    }
}
Also used : TempFolder(io.hetu.core.common.filesystem.TempFolder) FileOutputStream(java.io.FileOutputStream) File(java.io.File) ValueSet(io.prestosql.spi.predicate.ValueSet) FileInputStream(java.io.FileInputStream) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 2 with ValueSet

use of io.prestosql.spi.predicate.ValueSet in project hetu-core by openlookeng.

the class HivePartitionManager method toCompactTupleDomain.

private static TupleDomain<HiveColumnHandle> toCompactTupleDomain(TupleDomain<ColumnHandle> effectivePredicate, int threshold) {
    ImmutableMap.Builder<HiveColumnHandle, Domain> builder = ImmutableMap.builder();
    effectivePredicate.getDomains().ifPresent(domains -> {
        for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) {
            HiveColumnHandle hiveColumnHandle = (HiveColumnHandle) entry.getKey();
            ValueSet values = entry.getValue().getValues();
            ValueSet compactValueSet = values.getValuesProcessor().<Optional<ValueSet>>transform(ranges -> ranges.getRangeCount() > threshold ? Optional.of(ValueSet.ofRanges(ranges.getSpan())) : Optional.empty(), discreteValues -> discreteValues.getValues().size() > threshold ? Optional.of(ValueSet.all(values.getType())) : Optional.empty(), allOrNone -> Optional.empty()).orElse(values);
            builder.put(hiveColumnHandle, Domain.create(compactValueSet, entry.getValue().isNullAllowed()));
        }
    });
    return TupleDomain.withColumnDomains(builder.build());
}
Also used : ValueSet(io.prestosql.spi.predicate.ValueSet) DecimalType(io.prestosql.spi.type.DecimalType) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) NullableValue(io.prestosql.spi.predicate.NullableValue) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Predicates.not(com.google.common.base.Predicates.not) Map(java.util.Map) TupleDomain.all(io.prestosql.spi.predicate.TupleDomain.all) Type(io.prestosql.spi.type.Type) Constraint(io.prestosql.spi.connector.Constraint) ISODateTimeFormat(org.joda.time.format.ISODateTimeFormat) PrestoException(io.prestosql.spi.PrestoException) ImmutableMap(com.google.common.collect.ImmutableMap) RealType(io.prestosql.spi.type.RealType) Predicate(java.util.function.Predicate) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Chars.padSpaces(io.prestosql.spi.type.Chars.padSpaces) String.format(java.lang.String.format) TimestampType(io.prestosql.spi.type.TimestampType) List(java.util.List) Table(io.prestosql.plugin.hive.metastore.Table) Domain(io.prestosql.spi.predicate.Domain) FileUtils(org.apache.hadoop.hive.common.FileUtils) Optional(java.util.Optional) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) HiveUtil.parsePartitionValue(io.prestosql.plugin.hive.HiveUtil.parsePartitionValue) BigintType(io.prestosql.spi.type.BigintType) Slice(io.airlift.slice.Slice) CharType(io.prestosql.spi.type.CharType) Decimals(io.prestosql.spi.type.Decimals) TupleDomain.none(io.prestosql.spi.predicate.TupleDomain.none) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ImmutableList(com.google.common.collect.ImmutableList) DoubleType(io.prestosql.spi.type.DoubleType) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicates(com.google.common.base.Predicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) SmallintType(io.prestosql.spi.type.SmallintType) VerifyException(com.google.common.base.VerifyException) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) Iterator(java.util.Iterator) IntegerType(io.prestosql.spi.type.IntegerType) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) TypeManager(io.prestosql.spi.type.TypeManager) TinyintType(io.prestosql.spi.type.TinyintType) Maps(com.google.common.collect.Maps) TimeUnit(java.util.concurrent.TimeUnit) Collectors.toList(java.util.stream.Collectors.toList) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DateType(io.prestosql.spi.type.DateType) BooleanType(io.prestosql.spi.type.BooleanType) VarcharType(io.prestosql.spi.type.VarcharType) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) Domain(io.prestosql.spi.predicate.Domain) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ValueSet(io.prestosql.spi.predicate.ValueSet) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 3 with ValueSet

use of io.prestosql.spi.predicate.ValueSet in project hetu-core by openlookeng.

the class HiveBucketing method getHiveBucketFilter.

public static Optional<HiveBucketFilter> getHiveBucketFilter(Table table, TupleDomain<ColumnHandle> effectivePredicate) {
    if (!getHiveBucketHandle(table).isPresent()) {
        return Optional.empty();
    }
    if (bucketedOnTimestamp(table.getStorage().getBucketProperty().get(), table)) {
        return Optional.empty();
    }
    Optional<Map<ColumnHandle, NullableValue>> bindings = TupleDomain.extractFixedValues(effectivePredicate);
    if (!bindings.isPresent()) {
        return Optional.empty();
    }
    OptionalInt singleBucket = getHiveBucket(table, bindings.get());
    if (singleBucket.isPresent()) {
        return Optional.of(new HiveBucketFilter(ImmutableSet.of(singleBucket.getAsInt())));
    }
    if (!effectivePredicate.getDomains().isPresent()) {
        return Optional.empty();
    }
    Optional<Domain> domain = effectivePredicate.getDomains().get().entrySet().stream().filter(entry -> ((HiveColumnHandle) entry.getKey()).getName().equals(BUCKET_COLUMN_NAME)).findFirst().map(Entry::getValue);
    if (!domain.isPresent()) {
        return Optional.empty();
    }
    ValueSet values = domain.get().getValues();
    ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
    int bucketCount = table.getStorage().getBucketProperty().get().getBucketCount();
    for (int i = 0; i < bucketCount; i++) {
        if (values.containsValue((long) i)) {
            builder.add(i);
        }
    }
    return Optional.of(new HiveBucketFilter(builder.build()));
}
Also used : OptionalInt(java.util.OptionalInt) Entry(java.util.Map.Entry) ImmutableSet(com.google.common.collect.ImmutableSet) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Domain(io.prestosql.spi.predicate.Domain) HashMap(java.util.HashMap) Map(java.util.Map) ValueSet(io.prestosql.spi.predicate.ValueSet)

Example 4 with ValueSet

use of io.prestosql.spi.predicate.ValueSet in project hetu-core by openlookeng.

the class TupleDomainFilterUtils method toFilter.

public static TupleDomainFilter toFilter(Domain domain) {
    ValueSet values = domain.getValues();
    checkArgument(values instanceof SortedRangeSet, "Unexpected domain type: " + values.getClass().getSimpleName());
    List<Range> ranges = ((SortedRangeSet) values).getOrderedRanges();
    boolean nullAllowed = domain.isNullAllowed();
    if (ranges.isEmpty() && nullAllowed) {
        return IS_NULL;
    }
    Type type = domain.getType();
    if (ranges.size() == 1 && type != BOOLEAN) {
        return createRangeFilter(type, ranges.get(0), nullAllowed);
    }
    if (type == BOOLEAN) {
        return createBooleanFilter(ranges, nullAllowed);
    }
    List<TupleDomainFilter> rangeFilters = ranges.stream().map(range -> createRangeFilter(type, range, false)).filter(not(ALWAYS_FALSE::equals)).collect(toImmutableList());
    if (rangeFilters.isEmpty()) {
        return nullAllowed ? IS_NULL : ALWAYS_FALSE;
    }
    if (rangeFilters.get(0) instanceof TupleDomainFilter.BigintRange) {
        List<TupleDomainFilter.BigintRange> bigintRanges = rangeFilters.stream().map(TupleDomainFilter.BigintRange.class::cast).collect(toImmutableList());
        // will get converted to BigIntValues so that filter will be evaluated on multiple possible values.
        if (bigintRanges.stream().allMatch(TupleDomainFilter.BigintRange::isSingleValue)) {
            return TupleDomainFilter.BigintValues.of(bigintRanges.stream().mapToLong(TupleDomainFilter.BigintRange::getLower).toArray(), nullAllowed);
        }
        // in any of these ranges.
        return TupleDomainFilter.BigintMultiRange.of(bigintRanges, nullAllowed);
    }
    return getMultiValuesTDF(rangeFilters, nullAllowed);
}
Also used : Varchars.isVarcharType(io.prestosql.spi.type.Varchars.isVarcharType) CharType(io.prestosql.spi.type.CharType) DecimalType(io.prestosql.spi.type.DecimalType) Type(io.prestosql.spi.type.Type) VarbinaryType.isVarbinaryType(io.prestosql.spi.type.VarbinaryType.isVarbinaryType) SortedRangeSet(io.prestosql.spi.predicate.SortedRangeSet) Range(io.prestosql.spi.predicate.Range) ValueSet(io.prestosql.spi.predicate.ValueSet)

Aggregations

ValueSet (io.prestosql.spi.predicate.ValueSet)4 Domain (io.prestosql.spi.predicate.Domain)2 TupleDomain (io.prestosql.spi.predicate.TupleDomain)2 CharType (io.prestosql.spi.type.CharType)2 DecimalType (io.prestosql.spi.type.DecimalType)2 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Predicates (com.google.common.base.Predicates)1 Predicates.not (com.google.common.base.Predicates.not)1 VerifyException (com.google.common.base.VerifyException)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Maps (com.google.common.collect.Maps)1 Slice (io.airlift.slice.Slice)1 TempFolder (io.hetu.core.common.filesystem.TempFolder)1 HiveUtil.parsePartitionValue (io.prestosql.plugin.hive.HiveUtil.parsePartitionValue)1 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)1 MetastoreUtil (io.prestosql.plugin.hive.metastore.MetastoreUtil)1 SemiTransactionalHiveMetastore (io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore)1