Search in sources :

Example 51 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class FilterCompat method get.

/**
 * Given a FilterPredicate, return a Filter that wraps it.
 * This method also logs the filter being used and rewrites
 * the predicate to not include the not() operator.
 */
public static Filter get(FilterPredicate filterPredicate) {
    checkNotNull(filterPredicate, "filterPredicate");
    LOG.info("Filtering using predicate: {}", filterPredicate);
    // rewrite the predicate to not include the not() operator
    FilterPredicate collapsedPredicate = LogicalInverseRewriter.rewrite(filterPredicate);
    if (!filterPredicate.equals(collapsedPredicate)) {
        LOG.info("Predicate has been collapsed to: {}", collapsedPredicate);
    }
    return new FilterPredicateCompat(collapsedPredicate);
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 52 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class DictionaryFilterTest method testEqBinary.

@Test
public void testEqBinary() throws Exception {
    BinaryColumn b = binaryColumn("binary_field");
    FilterPredicate pred = eq(b, Binary.fromString("c"));
    assertFalse("Should not drop block for lower case letters", canDrop(pred, ccmd, dictionaries));
    assertTrue("Should drop block for upper case letters", canDrop(eq(b, Binary.fromString("A")), ccmd, dictionaries));
    assertFalse("Should not drop block for null", canDrop(eq(b, null), ccmd, dictionaries));
}
Also used : BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 53 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class TestRecordLevelFilters method testComplex.

@Test
public void testComplex() throws Exception {
    BinaryColumn name = binaryColumn("name");
    DoubleColumn lon = doubleColumn("location.lon");
    DoubleColumn lat = doubleColumn("location.lat");
    FilterPredicate pred = or(and(gt(lon, 150.0), notEq(lat, null)), eq(name, Binary.fromString("alice")));
    List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
    assertFilter(found, new UserFilter() {

        @Override
        public boolean keep(User u) {
            String name = u.getName();
            Double lat = null;
            Double lon = null;
            if (u.getLocation() != null) {
                lat = u.getLocation().getLat();
                lon = u.getLocation().getLon();
            }
            return (lon != null && lon > 150.0 && lat != null) || "alice".equals(name);
        }
    });
}
Also used : Group(org.apache.parquet.example.data.Group) DoubleColumn(org.apache.parquet.filter2.predicate.Operators.DoubleColumn) User(org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User) BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 54 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.

the class TestStatisticsFilter method testOr.

@Test
public void testOr() {
    FilterPredicate yes = eq(intColumn, 9);
    FilterPredicate no = eq(doubleColumn, 50D);
    assertTrue(canDrop(or(yes, yes), columnMetas));
    assertFalse(canDrop(or(yes, no), columnMetas));
    assertFalse(canDrop(or(no, yes), columnMetas));
    assertFalse(canDrop(or(no, no), columnMetas));
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 55 with FilterPredicate

use of org.apache.parquet.filter2.predicate.FilterPredicate in project Gaffer by gchq.

the class QueryGenerator method getIsEqualFilter.

private FilterPredicate getIsEqualFilter(final String colName, final Object[] parquetObjects, final String group) {
    String[] paths = schemaUtils.getPaths(group, colName);
    if (null == paths) {
        paths = new String[1];
        paths[0] = colName;
    }
    FilterPredicate filter = null;
    for (int i = 0; i < paths.length; i++) {
        final String path = paths[i];
        FilterPredicate tempFilter;
        if (parquetObjects[i] instanceof String) {
            tempFilter = eq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
        } else if (parquetObjects[i] instanceof Boolean) {
            tempFilter = eq(booleanColumn(path), (Boolean) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Double) {
            tempFilter = eq(doubleColumn(path), (Double) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Float) {
            tempFilter = eq(floatColumn(path), (Float) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Integer) {
            tempFilter = eq(intColumn(path), (Integer) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof Long) {
            tempFilter = eq(longColumn(path), (Long) parquetObjects[i]);
        } else if (parquetObjects[i] instanceof java.util.Date) {
            tempFilter = eq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof java.sql.Date) {
            tempFilter = eq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
        } else if (parquetObjects[i] instanceof Short) {
            tempFilter = eq(intColumn(path), ((Short) parquetObjects[i]).intValue());
        } else if (parquetObjects[i] instanceof byte[]) {
            tempFilter = eq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
        } else {
            LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsEqual filter, therefore execution will take longer to perform this filter.");
            return null;
        }
        if (null == filter) {
            filter = tempFilter;
        } else {
            filter = and(filter, tempFilter);
        }
    }
    return filter;
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)76 Test (org.junit.Test)50 HashMap (java.util.HashMap)33 MessageType (org.apache.parquet.schema.MessageType)33 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)32 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)25 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)12 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)8 ArrayList (java.util.ArrayList)5 List (java.util.List)5 Group (org.apache.parquet.example.data.Group)5 Configuration (org.apache.hadoop.conf.Configuration)4 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)4 User (org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User)4 Predicate (java.util.function.Predicate)3 Path (org.apache.hadoop.fs.Path)3 Pair (uk.gov.gchq.gaffer.commonutil.pair.Pair)3 TupleAdaptedPredicate (uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate)3 HashSet (java.util.HashSet)2 GenericRecord (org.apache.avro.generic.GenericRecord)2