Search in sources :

Example 16 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project parquet-mr by apache.

the class DictionaryFilterTest method testAnd.

@Test
public void testAnd() throws Exception {
    BinaryColumn col = binaryColumn("binary_field");
    // both evaluate to false (no upper-case letters are in the dictionary)
    FilterPredicate B = eq(col, Binary.fromString("B"));
    FilterPredicate C = eq(col, Binary.fromString("C"));
    // both evaluate to true (all lower-case letters are in the dictionary)
    FilterPredicate x = eq(col, Binary.fromString("x"));
    FilterPredicate y = eq(col, Binary.fromString("y"));
    assertTrue("Should drop when either predicate must be false", canDrop(and(B, y), ccmd, dictionaries));
    assertTrue("Should drop when either predicate must be false", canDrop(and(x, C), ccmd, dictionaries));
    assertTrue("Should drop when either predicate must be false", canDrop(and(B, C), ccmd, dictionaries));
    assertFalse("Should not drop when either predicate could be true", canDrop(and(x, y), ccmd, dictionaries));
}
Also used : BinaryColumn(org.apache.parquet.filter2.predicate.Operators.BinaryColumn) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Test(org.junit.Test)

Example 17 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project hive by apache.

the class TestConvertAstToSearchArg method testExpression10.

@Test
public void testExpression10() throws Exception {
    /* id >= 10 and not (10 > id) */
    String exprStr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?> \n" + "<java version=\"1.6.0_31\" class=\"java.beans.XMLDecoder\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "  <void property=\"children\"> \n" + "   <object class=\"java.util.ArrayList\"> \n" + "    <void method=\"add\"> \n" + "     <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "      <void property=\"children\"> \n" + "       <object class=\"java.util.ArrayList\"> \n" + "        <void method=\"add\"> \n" + "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + "          <void property=\"column\"> \n" + "           <string>id</string> \n" + "          </void> \n" + "          <void property=\"tabAlias\"> \n" + "           <string>orc_people</string> \n" + "          </void> \n" + "          <void property=\"typeInfo\"> \n" + "           <object id=\"PrimitiveTypeInfo0\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + "            <void property=\"typeName\"> \n" + "             <string>int</string> \n" + "            </void> \n" + "           </object> \n" + "          </void> \n" + "         </object> \n" + "        </void> \n" + "        <void method=\"add\"> \n" + "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + "          <void property=\"typeInfo\"> \n" + "           <object idref=\"PrimitiveTypeInfo0\"/> \n" + "          </void> \n" + "          <void property=\"value\"> \n" + "           <int>10</int> \n" + "          </void> \n" + "         </object> \n" + "        </void> \n" + "       </object> \n" + "      </void> \n" + "      <void property=\"genericUDF\"> \n" + "       <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan\"/> \n" + "      </void> \n" + "      <void property=\"typeInfo\"> \n" + "       <object id=\"PrimitiveTypeInfo1\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + "        <void property=\"typeName\"> \n" + "         <string>boolean</string> \n" + "        </void> \n" + "       </object> \n" + "      </void> \n" + "     </object> \n" + "    </void> \n" + "    <void method=\"add\"> \n" + "     <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "      <void property=\"children\"> \n" + "       <object class=\"java.util.ArrayList\"> \n" + "        <void method=\"add\"> \n" + "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "          <void property=\"children\"> \n" + "           <object class=\"java.util.ArrayList\"> \n" + "            <void method=\"add\"> \n" + "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + "              <void property=\"column\"> \n" + "               <string>id</string> \n" + "              </void> \n" + "              <void property=\"tabAlias\"> \n" + "               <string>orc_people</string> \n" + "              </void> \n" + "              <void property=\"typeInfo\"> \n" + "               <object idref=\"PrimitiveTypeInfo0\"/> \n" + "              </void> \n" + "             </object> \n" + "            </void> \n" + "            <void method=\"add\"> \n" + "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + "              <void property=\"typeInfo\"> \n" + "               <object idref=\"PrimitiveTypeInfo0\"/> \n" + "              </void> \n" + "              <void property=\"value\"> \n" + "               <int>10</int> \n" + "              </void> \n" + "             </object> \n" + "            </void> \n" + "           </object> \n" + "          </void> \n" + "          <void property=\"genericUDF\"> \n" + "           <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan\"/> \n" + "          </void> \n" + "          <void property=\"typeInfo\"> \n" + "           <object idref=\"PrimitiveTypeInfo1\"/> \n" + "          </void> \n" + "         </object> \n" + "        </void> \n" + "       </object> \n" + "      </void> \n" + "      <void property=\"genericUDF\"> \n" + "       <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot\"/> \n" + "      </void> \n" + "      <void property=\"typeInfo\"> \n" + "       <object idref=\"PrimitiveTypeInfo1\"/> \n" + "      </void> \n" + "     </object> \n" + "    </void> \n" + "   </object> \n" + "  </void> \n" + "  <void property=\"genericUDF\"> \n" + "   <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd\"/> \n" + "  </void> \n" + "  <void property=\"typeInfo\"> \n" + "   <object idref=\"PrimitiveTypeInfo1\"/> \n" + "  </void> \n" + " </object> \n" + "</java>";
    SearchArgumentImpl sarg = (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr));
    List<PredicateLeaf> leaves = sarg.getLeaves();
    assertEquals(1, leaves.size());
    MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 id;" + " required binary first_name; }");
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    String expected = "and(not(lt(id, 10)), not(lt(id, 10)))";
    assertEquals(expected, p.toString());
    assertEquals(PredicateLeaf.Type.LONG, leaves.get(0).getType());
    assertEquals(PredicateLeaf.Operator.LESS_THAN, leaves.get(0).getOperator());
    assertEquals("id", leaves.get(0).getColumnName());
    assertEquals(10L, leaves.get(0).getLiteral());
    assertEquals("(and (not leaf-0) (not leaf-0))", sarg.getExpression().toString());
    assertNoSharedNodes(sarg.getExpression(), Sets.<ExpressionTree>newIdentityHashSet());
    assertEquals(TruthValue.NO, sarg.evaluate(values(TruthValue.YES)));
    assertEquals(TruthValue.YES, sarg.evaluate(values(TruthValue.NO)));
    assertEquals(TruthValue.NULL, sarg.evaluate(values(TruthValue.NULL)));
    assertEquals(TruthValue.NO_NULL, sarg.evaluate(values(TruthValue.YES_NULL)));
    assertEquals(TruthValue.YES_NULL, sarg.evaluate(values(TruthValue.NO_NULL)));
    assertEquals(TruthValue.YES_NO, sarg.evaluate(values(TruthValue.YES_NO)));
    assertEquals(TruthValue.YES_NO_NULL, sarg.evaluate(values(TruthValue.YES_NO_NULL)));
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 18 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project hive by apache.

the class TestParquetFilterPredicate method testFilterColumnsThatDoNoExistOnSchema.

@Test
public void testFilterColumnsThatDoNoExistOnSchema() {
    MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required binary stinger; }");
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("a", PredicateLeaf.Type.LONG).between("y", PredicateLeaf.Type.LONG, 10L, // Column will be removed from filter
    20L).in("z", PredicateLeaf.Type.LONG, 1L, 2L, // Column will be removed from filter
    3L).nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger").end().end().build();
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    String expected = "and(not(eq(a, null)), not(eq(a, Binary{\"stinger\"})))";
    assertEquals(expected, p.toString());
}
Also used : SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 19 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project hive by apache.

the class TestParquetFilterPredicate method testFilterFloatColumns.

@Test
public void testFilterFloatColumns() {
    MessageType schema = MessageTypeParser.parseMessageType("message test {  required float a; required int32 b; }");
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("a", PredicateLeaf.Type.FLOAT).between("a", PredicateLeaf.Type.FLOAT, 10.2, 20.3).in("b", PredicateLeaf.Type.LONG, 1L, 2L, 3L).end().end().build();
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    String expected = "and(and(not(eq(a, null)), not(and(lteq(a, 20.3), not(lt(a, 10.2))))), not(or(or(eq(b, 1), eq(b, 2)), eq(b, 3))))";
    assertEquals(expected, p.toString());
}
Also used : SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 20 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project hive by apache.

the class TestParquetFilterPredicate method testFilterBetween.

@Test
public void testFilterBetween() {
    MessageType schema = MessageTypeParser.parseMessageType("message test {  required int32 bCol; }");
    SearchArgument sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 1L, 5L).build();
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    String expected = "and(lteq(bCol, 5), not(lt(bCol, 1)))";
    assertEquals(expected, p.toString());
    sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 5L, 1L).build();
    p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    expected = "and(lteq(bCol, 1), not(lt(bCol, 5)))";
    assertEquals(expected, p.toString());
    sarg = SearchArgumentFactory.newBuilder().between("bCol", PredicateLeaf.Type.LONG, 1L, 1L).build();
    p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    expected = "and(lteq(bCol, 1), not(lt(bCol, 1)))";
    assertEquals(expected, p.toString());
}
Also used : SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)21 FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)19 MessageType (org.apache.parquet.schema.MessageType)11 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)5 ValueInspector (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.ValueInspector)4 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)3 And (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And)2 Or (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 ObjectInputStream (java.io.ObjectInputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 Configuration (org.apache.hadoop.conf.Configuration)1 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)1 DoubleStatistics (org.apache.parquet.column.statistics.DoubleStatistics)1 IntStatistics (org.apache.parquet.column.statistics.IntStatistics)1 Group (org.apache.parquet.example.data.Group)1 RecordFilter (org.apache.parquet.filter.RecordFilter)1 UnboundRecordFilter (org.apache.parquet.filter.UnboundRecordFilter)1 Filter (org.apache.parquet.filter2.compat.FilterCompat.Filter)1