use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project hive by apache.
the class TestParquetFilterPredicate method testFilterFloatColumns.
@Test
public void testFilterFloatColumns() {
MessageType schema = MessageTypeParser.parseMessageType("message test { required float a; required int32 b; }");
SearchArgument sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("a", PredicateLeaf.Type.FLOAT).between("a", PredicateLeaf.Type.FLOAT, 10.2, 20.3).in("b", PredicateLeaf.Type.LONG, 1L, 2L, 3L).end().end().build();
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = "and(and(not(eq(a, null)), not(and(lteq(a, 20.3), not(lt(a, 10.2))))), not(or(or(eq(b, 1), eq(b, 2)), eq(b, 3))))";
assertEquals(expected, p.toString());
}
use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project hive by apache.
the class ParquetRecordReaderBase method getSplit.
/**
* gets a ParquetInputSplit corresponding to a split given by Hive
*
* @param oldSplit The split given by Hive
* @param conf The JobConf of the Hive job
* @return a ParquetInputSplit corresponding to the oldSplit
* @throws IOException if the config cannot be enhanced or if the footer cannot be read from the file
*/
@SuppressWarnings("deprecation")
protected ParquetInputSplit getSplit(final org.apache.hadoop.mapred.InputSplit oldSplit, final JobConf conf) throws IOException {
ParquetInputSplit split;
if (oldSplit instanceof FileSplit) {
final Path finalPath = ((FileSplit) oldSplit).getPath();
jobConf = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent());
// TODO enable MetadataFilter by using readFooter(Configuration configuration, Path file,
// MetadataFilter filter) API
final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath);
final List<BlockMetaData> blocks = parquetMetadata.getBlocks();
final FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
final ReadSupport.ReadContext readContext = new DataWritableReadSupport().init(new InitContext(jobConf, null, fileMetaData.getSchema()));
// Compute stats
for (BlockMetaData bmd : blocks) {
serDeStats.setRowCount(serDeStats.getRowCount() + bmd.getRowCount());
serDeStats.setRawDataSize(serDeStats.getRawDataSize() + bmd.getTotalByteSize());
}
schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata().get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)).getFieldCount();
final List<BlockMetaData> splitGroup = new ArrayList<BlockMetaData>();
final long splitStart = ((FileSplit) oldSplit).getStart();
final long splitLength = ((FileSplit) oldSplit).getLength();
for (final BlockMetaData block : blocks) {
final long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) {
splitGroup.add(block);
}
}
if (splitGroup.isEmpty()) {
LOG.warn("Skipping split, could not find row group in: " + oldSplit);
return null;
}
FilterCompat.Filter filter = setFilter(jobConf, fileMetaData.getSchema());
if (filter != null) {
filtedBlocks = RowGroupFilter.filterRowGroups(filter, splitGroup, fileMetaData.getSchema());
if (filtedBlocks.isEmpty()) {
LOG.debug("All row groups are dropped due to filter predicates");
return null;
}
long droppedBlocks = splitGroup.size() - filtedBlocks.size();
if (droppedBlocks > 0) {
LOG.debug("Dropping " + droppedBlocks + " row groups that do not pass filter predicate");
}
} else {
filtedBlocks = splitGroup;
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) {
skipTimestampConversion = !Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr");
}
split = new ParquetInputSplit(finalPath, splitStart, splitLength, oldSplit.getLocations(), filtedBlocks, readContext.getRequestedSchema().toString(), fileMetaData.getSchema().toString(), fileMetaData.getKeyValueMetaData(), readContext.getReadSupportMetadata());
return split;
} else {
throw new IllegalArgumentException("Unknown split type: " + oldSplit);
}
}
use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project hive by apache.
the class TestConvertAstToSearchArg method testExpression1.
@Test
public void testExpression1() throws Exception {
// first_name = 'john' or
// 'greg' < first_name or
// 'alan' > first_name or
// id > 12 or
// 13 < id or
// id < 15 or
// 16 > id or
// (id <=> 30 and first_name <=> 'owen')
String exprStr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?> \n" + "<java version=\"1.6.0_31\" class=\"java.beans.XMLDecoder\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>first_name</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object id=\"PrimitiveTypeInfo0\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + " <void property=\"typeName\"> \n" + " <string>string</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <string>john</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object id=\"PrimitiveTypeInfo1\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + " <void property=\"typeName\"> \n" + " <string>boolean</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <string>greg</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>first_name</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <string>alan</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>first_name</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>id</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object id=\"PrimitiveTypeInfo2\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + " <void property=\"typeName\"> \n" + " <string>int</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <int>12</int> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <int>13</int> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>id</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>id</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <int>15</int> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <int>16</int> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>id</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>id</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <int>30</int> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>first_name</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <string>owen</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " </object> \n" + "</java> \n";
SearchArgumentImpl sarg = (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr));
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(9, leaves.size());
MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 id;" + " required binary first_name; }");
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String[] conditions = new String[] { "eq(first_name, Binary{\"john\"})", /* first_name = 'john' */
"not(lteq(first_name, Binary{\"greg\"}))", /* 'greg' < first_name */
"lt(first_name, Binary{\"alan\"})", /* 'alan' > first_name */
"not(lteq(id, 12))", /* id > 12 or */
"not(lteq(id, 13))", /* 13 < id or */
"lt(id, 15)", /* id < 15 or */
"lt(id, 16)", /* 16 > id or */
"eq(id, 30)", /* id <=> 30 */
"eq(first_name, Binary{\"owen\"})" /* first_name <=> 'owen' */
};
String expected = String.format("and(or(or(or(or(or(or(or(%1$s, %2$s), %3$s), %4$s), %5$s), %6$s), %7$s), %8$s), " + "or(or(or(or(or(or(or(%1$s, %2$s), %3$s), %4$s), %5$s), %6$s), %7$s), %9$s))", conditions);
assertEquals(expected, p.toString());
PredicateLeaf leaf = leaves.get(0);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
assertEquals("john", leaf.getLiteral());
leaf = leaves.get(1);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
assertEquals("greg", leaf.getLiteral());
leaf = leaves.get(2);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
assertEquals("alan", leaf.getLiteral());
leaf = leaves.get(3);
assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
assertEquals(12L, leaf.getLiteral());
leaf = leaves.get(4);
assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
assertEquals(13L, leaf.getLiteral());
leaf = leaves.get(5);
assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
assertEquals(15L, leaf.getLiteral());
leaf = leaves.get(6);
assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
assertEquals(16L, leaf.getLiteral());
leaf = leaves.get(7);
assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
assertEquals(30L, leaf.getLiteral());
leaf = leaves.get(8);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
assertEquals("owen", leaf.getLiteral());
assertEquals("(and (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" + " (not leaf-4) leaf-5 leaf-6 leaf-7)" + " (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" + " (not leaf-4) leaf-5 leaf-6 leaf-8))", sarg.getExpression().toString());
assertNoSharedNodes(sarg.getExpression(), Sets.<ExpressionTree>newIdentityHashSet());
}
use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project parquet-mr by apache.
the class ParquetLoader method buildFilter.
private FilterPredicate buildFilter(Expression e) {
OpType op = e.getOpType();
if (e instanceof BinaryExpression) {
Expression lhs = ((BinaryExpression) e).getLhs();
Expression rhs = ((BinaryExpression) e).getRhs();
switch(op) {
case OP_AND:
return and(buildFilter(lhs), buildFilter(rhs));
case OP_OR:
return or(buildFilter(lhs), buildFilter(rhs));
case OP_BETWEEN:
BetweenExpression between = (BetweenExpression) rhs;
return and(buildFilter(OpType.OP_GE, (Column) lhs, (Const) between.getLower()), buildFilter(OpType.OP_LE, (Column) lhs, (Const) between.getUpper()));
case OP_IN:
FilterPredicate current = null;
for (Object value : ((InExpression) rhs).getValues()) {
FilterPredicate next = buildFilter(OpType.OP_EQ, (Column) lhs, (Const) value);
if (current != null) {
current = or(current, next);
} else {
current = next;
}
}
return current;
}
if (lhs instanceof Column && rhs instanceof Const) {
return buildFilter(op, (Column) lhs, (Const) rhs);
} else if (lhs instanceof Const && rhs instanceof Column) {
return buildFilter(op, (Column) rhs, (Const) lhs);
}
} else if (e instanceof UnaryExpression && op == OpType.OP_NOT) {
return LogicalInverseRewriter.rewrite(not(buildFilter(((UnaryExpression) e).getExpression())));
}
throw new RuntimeException("Could not build filter for expression: " + e);
}
use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project parquet-mr by apache.
the class TestRecordLevelFilters method testComplex.
@Test
public void testComplex() throws Exception {
BinaryColumn name = binaryColumn("name");
DoubleColumn lon = doubleColumn("location.lon");
DoubleColumn lat = doubleColumn("location.lat");
FilterPredicate pred = or(and(gt(lon, 150.0), notEq(lat, null)), eq(name, Binary.fromString("alice")));
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
assertFilter(found, new UserFilter() {
@Override
public boolean keep(User u) {
String name = u.getName();
Double lat = null;
Double lon = null;
if (u.getLocation() != null) {
lat = u.getLocation().getLat();
lon = u.getLocation().getLon();
}
return (lon != null && lon > 150.0 && lat != null) || "alice".equals(name);
}
});
}
Aggregations