use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class FilterPredicateLeafBuilder method buildPredicate.
/**
* Build filter predicate with multiple constants
*
* @param op IN or BETWEEN
* @param literals
* @param columnName
* @return
*/
public FilterPredicate buildPredicate(PredicateLeaf.Operator op, List<Object> literals, String columnName) throws Exception {
FilterPredicate result = null;
switch(op) {
case IN:
for (Object literal : literals) {
if (result == null) {
result = buildPredict(PredicateLeaf.Operator.EQUALS, literal, columnName);
} else {
result = or(result, buildPredict(PredicateLeaf.Operator.EQUALS, literal, columnName));
}
}
return result;
case BETWEEN:
if (literals.size() != 2) {
throw new RuntimeException("Not able to build 'between' operation filter with " + literals + " which needs two literals");
}
Object min = literals.get(0);
Object max = literals.get(1);
FilterPredicate lt = not(buildPredict(PredicateLeaf.Operator.LESS_THAN, min, columnName));
FilterPredicate gt = buildPredict(PredicateLeaf.Operator.LESS_THAN_EQUALS, max, columnName);
result = FilterApi.and(gt, lt);
return result;
default:
throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
}
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.
the class ParquetLoader method setInput.
private void setInput(String location, Job job) throws IOException {
this.setLocationHasBeenCalled = true;
this.location = location;
setInputPaths(job, location);
// not file metadata or pig framework and would get overwritten in initSchema().
if (UDFContext.getUDFContext().isFrontend()) {
storeInUDFContext(PARQUET_COLUMN_INDEX_ACCESS, Boolean.toString(columnIndexAccess));
}
schema = PigSchemaConverter.parsePigSchema(getPropertyFromUDFContext(PARQUET_PIG_SCHEMA));
requiredFieldList = PigSchemaConverter.deserializeRequiredFieldList(getPropertyFromUDFContext(PARQUET_PIG_REQUIRED_FIELDS));
columnIndexAccess = Boolean.parseBoolean(getPropertyFromUDFContext(PARQUET_COLUMN_INDEX_ACCESS));
initSchema(job);
if (UDFContext.getUDFContext().isFrontend()) {
// Setting for task-side loading via initSchema()
storeInUDFContext(PARQUET_PIG_SCHEMA, pigSchemaToString(schema));
storeInUDFContext(PARQUET_PIG_REQUIRED_FIELDS, serializeRequiredFieldList(requiredFieldList));
}
// Used by task-side loader via TupleReadSupport
getConfiguration(job).set(PARQUET_PIG_SCHEMA, pigSchemaToString(schema));
getConfiguration(job).set(PARQUET_PIG_REQUIRED_FIELDS, serializeRequiredFieldList(requiredFieldList));
getConfiguration(job).set(PARQUET_COLUMN_INDEX_ACCESS, Boolean.toString(columnIndexAccess));
FilterPredicate filterPredicate = (FilterPredicate) getFromUDFContext(ParquetInputFormat.FILTER_PREDICATE);
if (filterPredicate != null) {
ParquetInputFormat.setFilterPredicate(getConfiguration(job), filterPredicate);
}
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.
the class TestFilterApiMethods method testSerializable.
@Test
public void testSerializable() throws Exception {
BinaryColumn binary = binaryColumn("foo");
FilterPredicate p = and(or(and(userDefined(intColumn, DummyUdp.class), predicate), eq(binary, Binary.fromString("hi"))), userDefined(longColumn, new IsMultipleOf(7)));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(p);
oos.close();
ObjectInputStream is = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray()));
FilterPredicate read = (FilterPredicate) is.readObject();
assertEquals(p, read);
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.
the class RowGroupFilter method visit.
@Override
public List<BlockMetaData> visit(FilterCompat.FilterPredicateCompat filterPredicateCompat) {
FilterPredicate filterPredicate = filterPredicateCompat.getFilterPredicate();
// check that the schema of the filter matches the schema of the file
SchemaCompatibilityValidator.validate(filterPredicate, schema);
List<BlockMetaData> filteredBlocks = new ArrayList<BlockMetaData>();
for (BlockMetaData block : blocks) {
boolean drop = false;
if (levels.contains(FilterLevel.STATISTICS)) {
drop = StatisticsFilter.canDrop(filterPredicate, block.getColumns());
}
if (!drop && levels.contains(FilterLevel.DICTIONARY)) {
drop = DictionaryFilter.canDrop(filterPredicate, block.getColumns(), reader.getDictionaryReader(block));
}
if (!drop) {
filteredBlocks.add(block);
}
}
return filteredBlocks;
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project parquet-mr by apache.
the class DictionaryFilterTest method testInverseUdp.
@Test
public void testInverseUdp() throws Exception {
InInt32UDP droppable = new InInt32UDP(ImmutableSet.of(42));
InInt32UDP undroppable = new InInt32UDP(ImmutableSet.of(205));
Set<Integer> allValues = ImmutableSet.copyOf(Arrays.asList(ArrayUtils.toObject(intValues)));
InInt32UDP completeMatch = new InInt32UDP(allValues);
FilterPredicate inverse = LogicalInverseRewriter.rewrite(not(userDefined(intColumn("int32_field"), droppable)));
FilterPredicate inverse1 = LogicalInverseRewriter.rewrite(not(userDefined(intColumn("int32_field"), undroppable)));
FilterPredicate inverse2 = LogicalInverseRewriter.rewrite(not(userDefined(intColumn("int32_field"), completeMatch)));
assertFalse("Should not drop block for inverse of non-matching UDP", canDrop(inverse, ccmd, dictionaries));
assertFalse("Should not drop block for inverse of UDP with some matches", canDrop(inverse1, ccmd, dictionaries));
assertTrue("Should drop block for inverse of UDP with all matches", canDrop(inverse2, ccmd, dictionaries));
}
Aggregations