Search in sources :

Example 41 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project hive by apache.

the class TestParquetFilterPredicate method testFilterBetween.

@Test
public void testFilterBetween() {
    MessageType schema = MessageTypeParser.parseMessageType("message test {  required int32 bCol; }");
    SearchArgument sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 1L, 5L).build();
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("bCol", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "and(lteq(bCol, 5), not(lt(bCol, 1)))";
    assertEquals(expected, p.toString());
    sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 5L, 1L).build();
    p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    expected = "and(lteq(bCol, 1), not(lt(bCol, 5)))";
    assertEquals(expected, p.toString());
    sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 1L, 1L).build();
    p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    expected = "and(lteq(bCol, 1), not(lt(bCol, 1)))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 42 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project hive by apache.

the class TestParquetFilterPredicate method testFilterFloatColumn.

@Test
public void testFilterFloatColumn() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("x", PredicateLeaf.Type.LONG, 22L).lessThan("x1", PredicateLeaf.Type.LONG, 22L).lessThanEquals("y", PredicateLeaf.Type.STRING, new HiveChar("hi", 10).toString()).equals("z", PredicateLeaf.Type.FLOAT, Double.valueOf(0.22)).equals("z1", PredicateLeaf.Type.FLOAT, Double.valueOf(0.22)).end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required int32 x; required int32 x1;" + " required binary y; required float z; required float z1;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("x", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    columnTypes.put("x1", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    columnTypes.put("y", TypeInfoFactory.getCharTypeInfo(10));
    columnTypes.put("z", TypeInfoFactory.getPrimitiveTypeInfo("float"));
    columnTypes.put("z1", TypeInfoFactory.getPrimitiveTypeInfo("float"));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "and(and(and(and(lt(x, 22), lt(x1, 22))," + " lteq(y, Binary{\"hi\"})), eq(z, " + "0.22)), eq(z1, 0.22))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 43 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project hive by apache.

the class TestParquetFilterPredicate method testFilterVarCharColumn.

@Test
public void testFilterVarCharColumn() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("a", PredicateLeaf.Type.STRING, new HiveVarchar("apple", 10).toString()).lessThanEquals("b", PredicateLeaf.Type.STRING, new HiveVarchar("pear", 10).toString()).equals("c", PredicateLeaf.Type.STRING, new HiveVarchar("orange", 10).toString()).nullSafeEquals("d", PredicateLeaf.Type.STRING, new HiveVarchar("pineapple", 9).toString()).in("e", PredicateLeaf.Type.STRING, new HiveVarchar("cherry", 10).toString(), new HiveVarchar("orange", 10).toString()).between("f", PredicateLeaf.Type.STRING, new HiveVarchar("apple", 10).toString(), new HiveVarchar("pear", 10).toString()).isNull("g", PredicateLeaf.Type.STRING).end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required binary a; required binary b;" + " required binary c; required binary d;" + " required binary e; required binary f;" + " required binary g;}");
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("b", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("c", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("d", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("e", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("f", TypeInfoFactory.getVarcharTypeInfo(10));
    columnTypes.put("g", TypeInfoFactory.getVarcharTypeInfo(10));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "and(and(and(and(and(and(" + "lt(a, Binary{\"apple\"}), " + "lteq(b, Binary{\"pear\"})), " + "eq(c, Binary{\"orange\"})), " + "eq(d, Binary{\"pineapple\"})), " + "or(eq(e, Binary{\"cherry\"}), eq(e, Binary{\"orange\"}))), " + "and(lteq(f, Binary{\"pear\"}), not(lt(f, Binary{\"apple\"})))), " + "eq(g, null))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 44 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project hive by apache.

the class TestParquetFilterPredicate method testFilterFloatColumns.

@Test
public void testFilterFloatColumns() {
    MessageType schema = MessageTypeParser.parseMessageType("message test {  required float a; required int32 b; }");
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("a", PredicateLeaf.Type.FLOAT).between("a", PredicateLeaf.Type.FLOAT, 10.2, 20.3).in("b", PredicateLeaf.Type.LONG, 1L, 2L, 3L).end().end().build();
    Map<String, TypeInfo> columnTypes = new HashMap<>();
    columnTypes.put("a", TypeInfoFactory.getPrimitiveTypeInfo("float"));
    columnTypes.put("b", TypeInfoFactory.getPrimitiveTypeInfo("int"));
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema, columnTypes);
    String expected = "and(and(not(eq(a, null)), not(and(lteq(a, 20.3), not(lt(a, 10.2))))), not(or(or(eq(b, 1), eq(b, 2)), eq(b, 3))))";
    assertEquals(expected, p.toString());
}
Also used : HashMap(java.util.HashMap) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 45 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project presto by prestodb.

the class TupleDomainParquetPredicate method getDomain.

/**
 * Get a domain for the ranges defined by each pair of elements from {@code minimums} and {@code maximums}.
 * Both arrays must have the same length.
 */
private static Domain getDomain(ColumnDescriptor column, Type type, List<Object> minimums, List<Object> maximums, boolean hasNullValue) {
    checkArgument(minimums.size() == maximums.size(), "Expected minimums and maximums to have the same size");
    List<Range> ranges = new ArrayList<>();
    if (type.equals(BOOLEAN)) {
        boolean hasTrueValues = minimums.stream().anyMatch(value -> (boolean) value) || maximums.stream().anyMatch(value -> (boolean) value);
        boolean hasFalseValues = minimums.stream().anyMatch(value -> !(boolean) value) || maximums.stream().anyMatch(value -> !(boolean) value);
        if (hasTrueValues && hasFalseValues) {
            return Domain.all(type);
        }
        if (hasTrueValues) {
            return Domain.create(ValueSet.of(type, true), hasNullValue);
        }
        if (hasFalseValues) {
            return Domain.create(ValueSet.of(type, false), hasNullValue);
        }
        // All nulls case is handled earlier
        throw new VerifyException("Impossible boolean statistics");
    }
    if ((type.equals(BIGINT) || type.equals(TINYINT) || type.equals(SMALLINT) || type.equals(INTEGER))) {
        for (int i = 0; i < minimums.size(); i++) {
            long min = asLong(minimums.get(i));
            long max = asLong(maximums.get(i));
            if (isStatisticsOverflow(type, min, max)) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(REAL)) {
        for (int i = 0; i < minimums.size(); i++) {
            Float min = (Float) minimums.get(i);
            Float max = (Float) maximums.get(i);
            if (min.isNaN() || max.isNaN()) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, (long) floatToRawIntBits(min), true, (long) floatToRawIntBits(max), true));
        }
        checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(DOUBLE)) {
        for (int i = 0; i < minimums.size(); i++) {
            Double min = (Double) minimums.get(i);
            Double max = (Double) maximums.get(i);
            if (min.isNaN() || max.isNaN()) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (isVarcharType(type)) {
        for (int i = 0; i < minimums.size(); i++) {
            Slice min = Slices.wrappedBuffer(((Binary) minimums.get(i)).toByteBuffer());
            Slice max = Slices.wrappedBuffer(((Binary) maximums.get(i)).toByteBuffer());
            ranges.add(Range.range(type, min, true, max, true));
        }
        checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(DATE)) {
        for (int i = 0; i < minimums.size(); i++) {
            long min = asLong(minimums.get(i));
            long max = asLong(maximums.get(i));
            if (isStatisticsOverflow(type, min, max)) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    return Domain.create(ValueSet.all(type), hasNullValue);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) PredicateUtils.isStatisticsOverflow(com.facebook.presto.parquet.predicate.PredicateUtils.isStatisticsOverflow) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) ColumnIndex(org.apache.parquet.internal.column.columnindex.ColumnIndex) FilterApi(org.apache.parquet.filter2.predicate.FilterApi) ByteBuffer(java.nio.ByteBuffer) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Slices(io.airlift.slice.Slices) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) UserDefinedPredicate(org.apache.parquet.filter2.predicate.UserDefinedPredicate) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) BINARY(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) String.format(java.lang.String.format) Range(com.facebook.presto.common.predicate.Range) Binary(org.apache.parquet.io.api.Binary) Serializable(java.io.Serializable) INT64(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64) List(java.util.List) LITTLE_ENDIAN(java.nio.ByteOrder.LITTLE_ENDIAN) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) Optional(java.util.Optional) DictionaryPage(com.facebook.presto.parquet.DictionaryPage) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) Slice(io.airlift.slice.Slice) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) HashMap(java.util.HashMap) INT32(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32) Function(java.util.function.Function) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) ParquetDataSourceId(com.facebook.presto.parquet.ParquetDataSourceId) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) Type(com.facebook.presto.common.type.Type) VerifyException(com.google.common.base.VerifyException) Statistics(org.apache.parquet.column.statistics.Statistics) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) Dictionary(com.facebook.presto.parquet.dictionary.Dictionary) FLOAT(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ValueSet(com.facebook.presto.common.predicate.ValueSet) VerifyException(com.google.common.base.VerifyException) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) Range(com.facebook.presto.common.predicate.Range)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)45 Test (org.junit.Test)31 HashMap (java.util.HashMap)22 MessageType (org.apache.parquet.schema.MessageType)22 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)20 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)15 List (java.util.List)6 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)6 ArrayList (java.util.ArrayList)5 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)4 ValueInspector (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.ValueInspector)4 Path (org.apache.hadoop.fs.Path)3 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)2 And (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And)2 Or (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or)2 Test (org.junit.jupiter.api.Test)2 Pair (uk.gov.gchq.gaffer.commonutil.pair.Pair)2 ViewElementDefinition (uk.gov.gchq.gaffer.data.elementdefinition.view.ViewElementDefinition)2 GetElements (uk.gov.gchq.gaffer.operation.impl.get.GetElements)2 ParquetStore (uk.gov.gchq.gaffer.parquetstore.ParquetStore)2