Search in sources :

Example 6 with Or

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project hive by apache.

the class TestConvertAstToSearchArg method testExpression2.

@Test
public void testExpression2() throws Exception {
    /* first_name is null or
       first_name <> 'sue' or
       id >= 12 or
       id <= 4; */
    String exprStr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?> \n" + "<java version=\"1.6.0_31\" class=\"java.beans.XMLDecoder\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "  <void property=\"children\"> \n" + "   <object class=\"java.util.ArrayList\"> \n" + "    <void method=\"add\"> \n" + "     <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "      <void property=\"children\"> \n" + "       <object class=\"java.util.ArrayList\"> \n" + "        <void method=\"add\"> \n" + "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "          <void property=\"children\"> \n" + "           <object class=\"java.util.ArrayList\"> \n" + "            <void method=\"add\"> \n" + "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "              <void property=\"children\"> \n" + "               <object class=\"java.util.ArrayList\"> \n" + "                <void method=\"add\"> \n" + "                 <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + "                  <void property=\"column\"> \n" + "                   <string>first_name</string> \n" + "                  </void> \n" + "                  <void property=\"tabAlias\"> \n" + "                   <string>orc_people</string> \n" + "                  </void> \n" + "                  <void property=\"typeInfo\"> \n" + "                   <object id=\"PrimitiveTypeInfo0\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + "                    <void property=\"typeName\"> \n" + "                     <string>string</string> \n" + "                    </void> \n" + "                   </object> \n" + "                  </void> \n" + "                 </object> \n" + "                </void> \n" + "               </object> \n" + "              </void> \n" + "              <void property=\"genericUDF\"> \n" + "               <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull\"/> \n" + "              </void> \n" + "              <void property=\"typeInfo\"> \n" + "               <object id=\"PrimitiveTypeInfo1\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + "                <void property=\"typeName\"> \n" + "                 <string>boolean</string> \n" + "                </void> \n" + "               </object> \n" + "              </void> \n" + "             </object> \n" + "            </void> \n" + "            <void method=\"add\"> \n" + "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "              <void property=\"children\"> \n" + "               <object class=\"java.util.ArrayList\"> \n" + "                <void method=\"add\"> \n" + "                 <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + "                  <void property=\"column\"> \n" + "                   <string>first_name</string> \n" + "                  </void> \n" + "                  <void property=\"tabAlias\"> \n" + "                   <string>orc_people</string> \n" + "                  </void> \n" + "                  <void property=\"typeInfo\"> \n" + "                   <object idref=\"PrimitiveTypeInfo0\"/> \n" + "                  </void> \n" + "                 </object> \n" + "                </void> \n" + "                <void method=\"add\"> \n" + "                 <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + "                  <void property=\"typeInfo\"> \n" + "                   <object idref=\"PrimitiveTypeInfo0\"/> \n" + "                  </void> \n" + "                  <void property=\"value\"> \n" + "                   <string>sue</string> \n" + "                  </void> \n" + "                 </object> \n" + "                </void> \n" + "               </object> \n" + "              </void> \n" + "              <void property=\"genericUDF\"> \n" + "               <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual\"/> \n" + "              </void> \n" + "              <void property=\"typeInfo\"> \n" + "               <object idref=\"PrimitiveTypeInfo1\"/> \n" + "              </void> \n" + "             </object> \n" + "            </void> \n" + "           </object> \n" + "          </void> \n" + "          <void property=\"genericUDF\"> \n" + "           <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr\"/> \n" + "          </void> \n" + "          <void property=\"typeInfo\"> \n" + "           <object idref=\"PrimitiveTypeInfo1\"/> \n" + "          </void> \n" + "         </object> \n" + "        </void> \n" + "        <void method=\"add\"> \n" + "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "          <void property=\"children\"> \n" + "           <object class=\"java.util.ArrayList\"> \n" + "            <void method=\"add\"> \n" + "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + "              <void property=\"column\"> \n" + "               <string>id</string> \n" + "              </void> \n" + "              <void property=\"tabAlias\"> \n" + "               <string>orc_people</string> \n" + "              </void> \n" + "              <void property=\"typeInfo\"> \n" + "               <object id=\"PrimitiveTypeInfo2\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + "                <void property=\"typeName\"> \n" + "                 <string>int</string> \n" + "                </void> \n" + "               </object> \n" + "              </void> \n" + "             </object> \n" + "            </void> \n" + "            <void method=\"add\"> \n" + "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + "              <void property=\"typeInfo\"> \n" + "               <object idref=\"PrimitiveTypeInfo2\"/> \n" + "              </void> \n" + "              <void property=\"value\"> \n" + "               <int>12</int> \n" + "              </void> \n" + "             </object> \n" + "            </void> \n" + "           </object> \n" + "          </void> \n" + "          <void property=\"genericUDF\"> \n" + "           <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan\"/> \n" + "          </void> \n" + "          <void property=\"typeInfo\"> \n" + "           <object idref=\"PrimitiveTypeInfo1\"/> \n" + "          </void> \n" + "         </object> \n" + "        </void> \n" + "       </object> \n" + "      </void> \n" + "      <void property=\"genericUDF\"> \n" + "       <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr\"/> \n" + "      </void> \n" + "      <void property=\"typeInfo\"> \n" + "       <object idref=\"PrimitiveTypeInfo1\"/> \n" + "      </void> \n" + "     </object> \n" + "    </void> \n" + "    <void method=\"add\"> \n" + "     <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "      <void property=\"children\"> \n" + "       <object class=\"java.util.ArrayList\"> \n" + "        <void method=\"add\"> \n" + "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + "          <void property=\"column\"> \n" + "           <string>id</string> \n" + "          </void> \n" + "          <void property=\"tabAlias\"> \n" + "           <string>orc_people</string> \n" + "          </void> \n" + "          <void property=\"typeInfo\"> \n" + "           <object idref=\"PrimitiveTypeInfo2\"/> \n" + "          </void> \n" + "         </object> \n" + "        </void> \n" + "        <void method=\"add\"> \n" + "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + "          <void property=\"typeInfo\"> \n" + "           <object idref=\"PrimitiveTypeInfo2\"/> \n" + "          </void> \n" + "          <void property=\"value\"> \n" + "           <int>4</int> \n" + "          </void> \n" + "         </object> \n" + "        </void> \n" + "       </object> \n" + "      </void> \n" + "      <void property=\"genericUDF\"> \n" + "       <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan\"/> \n" + "      </void> \n" + "      <void property=\"typeInfo\"> \n" + "       <object idref=\"PrimitiveTypeInfo1\"/> \n" + "      </void> \n" + "     </object> \n" + "    </void> \n" + "   </object> \n" + "  </void> \n" + "  <void property=\"genericUDF\"> \n" + "   <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr\"/> \n" + "  </void> \n" + "  <void property=\"typeInfo\"> \n" + "   <object idref=\"PrimitiveTypeInfo1\"/> \n" + "  </void> \n" + " </object> \n" + "</java> \n";
    SearchArgumentImpl sarg = (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr));
    List<PredicateLeaf> leaves = sarg.getLeaves();
    assertEquals(4, leaves.size());
    String[] conditions = new String[] { "eq(first_name, null)", /* first_name is null  */
    "not(eq(first_name, Binary{\"sue\"}))", /* first_name <> 'sue' */
    "not(lt(id, 12))", /* id >= 12            */
    "lteq(id, 4)" /* id <= 4             */
    };
    MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 id;" + " required binary first_name; }");
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    String expected = String.format("or(or(or(%1$s, %2$s), %3$s), %4$s)", conditions);
    assertEquals(expected, p.toString());
    PredicateLeaf leaf = leaves.get(0);
    assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
    assertEquals(PredicateLeaf.Operator.IS_NULL, leaf.getOperator());
    assertEquals("first_name", leaf.getColumnName());
    assertEquals(null, leaf.getLiteral());
    assertEquals(null, leaf.getLiteralList());
    leaf = leaves.get(1);
    assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
    assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
    assertEquals("first_name", leaf.getColumnName());
    assertEquals("sue", leaf.getLiteral());
    leaf = leaves.get(2);
    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
    assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
    assertEquals("id", leaf.getColumnName());
    assertEquals(12L, leaf.getLiteral());
    leaf = leaves.get(3);
    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
    assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
    assertEquals("id", leaf.getColumnName());
    assertEquals(4L, leaf.getLiteral());
    assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)", sarg.getExpression().toString());
    assertNoSharedNodes(sarg.getExpression(), Sets.<ExpressionTree>newIdentityHashSet());
    assertEquals(TruthValue.NO, sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.YES, TruthValue.NO)));
    assertEquals(TruthValue.YES, sarg.evaluate(values(TruthValue.YES, TruthValue.YES, TruthValue.YES, TruthValue.NO)));
    assertEquals(TruthValue.YES, sarg.evaluate(values(TruthValue.NO, TruthValue.NO, TruthValue.YES, TruthValue.NO)));
    assertEquals(TruthValue.YES, sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.NO, TruthValue.NO)));
    assertEquals(TruthValue.YES, sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.YES, TruthValue.YES)));
    assertEquals(TruthValue.NULL, sarg.evaluate(values(TruthValue.NULL, TruthValue.YES, TruthValue.YES, TruthValue.NO)));
    assertEquals(TruthValue.NULL, sarg.evaluate(values(TruthValue.NO, TruthValue.NULL, TruthValue.YES, TruthValue.NO)));
    assertEquals(TruthValue.NULL, sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.NULL, TruthValue.NO)));
    assertEquals(TruthValue.NULL, sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.YES, TruthValue.NULL)));
    assertEquals(TruthValue.YES_NO, sarg.evaluate(values(TruthValue.NO, TruthValue.YES_NO, TruthValue.YES, TruthValue.YES_NO)));
    assertEquals(TruthValue.NO_NULL, sarg.evaluate(values(TruthValue.NO, TruthValue.YES_NULL, TruthValue.YES, TruthValue.NO_NULL)));
    assertEquals(TruthValue.YES_NULL, sarg.evaluate(values(TruthValue.YES_NULL, TruthValue.YES_NO_NULL, TruthValue.YES, TruthValue.NULL)));
    assertEquals(TruthValue.YES_NO_NULL, sarg.evaluate(values(TruthValue.NO_NULL, TruthValue.YES_NO_NULL, TruthValue.YES, TruthValue.NO)));
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 7 with Or

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project hive by apache.

the class TestConvertAstToSearchArg method testExpression4.

@Test
public void testExpression4() throws Exception {
    /* id <> 12 and
       first_name in ('john', 'sue') and
       id in (34,50) */
    String exprStr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?> \n" + "<java version=\"1.6.0_31\" class=\"java.beans.XMLDecoder\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "  <void property=\"children\"> \n" + "   <object class=\"java.util.ArrayList\"> \n" + "    <void method=\"add\"> \n" + "     <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "      <void property=\"children\"> \n" + "       <object class=\"java.util.ArrayList\"> \n" + "        <void method=\"add\"> \n" + "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "          <void property=\"children\"> \n" + "           <object class=\"java.util.ArrayList\"> \n" + "            <void method=\"add\"> \n" + "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + "              <void property=\"column\"> \n" + "               <string>id</string> \n" + "              </void> \n" + "              <void property=\"tabAlias\"> \n" + "               <string>orc_people</string> \n" + "              </void> \n" + "              <void property=\"typeInfo\"> \n" + "               <object id=\"PrimitiveTypeInfo0\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + "                <void property=\"typeName\"> \n" + "                 <string>int</string> \n" + "                </void> \n" + "               </object> \n" + "              </void> \n" + "             </object> \n" + "            </void> \n" + "            <void method=\"add\"> \n" + "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + "              <void property=\"typeInfo\"> \n" + "               <object idref=\"PrimitiveTypeInfo0\"/> \n" + "              </void> \n" + "              <void property=\"value\"> \n" + "               <int>12</int> \n" + "              </void> \n" + "             </object> \n" + "            </void> \n" + "           </object> \n" + "          </void> \n" + "          <void property=\"genericUDF\"> \n" + "           <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual\"/> \n" + "          </void> \n" + "          <void property=\"typeInfo\"> \n" + "           <object id=\"PrimitiveTypeInfo1\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + "            <void property=\"typeName\"> \n" + "             <string>boolean</string> \n" + "            </void> \n" + "           </object> \n" + "          </void> \n" + "         </object> \n" + "        </void> \n" + "        <void method=\"add\"> \n" + "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "          <void property=\"children\"> \n" + "           <object class=\"java.util.ArrayList\"> \n" + "            <void method=\"add\"> \n" + "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + "              <void property=\"column\"> \n" + "               <string>first_name</string> \n" + "              </void> \n" + "              <void property=\"tabAlias\"> \n" + "               <string>orc_people</string> \n" + "              </void> \n" + "              <void property=\"typeInfo\"> \n" + "               <object id=\"PrimitiveTypeInfo2\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + "                <void property=\"typeName\"> \n" + "                 <string>string</string> \n" + "                </void> \n" + "               </object> \n" + "              </void> \n" + "             </object> \n" + "            </void> \n" + "            <void method=\"add\"> \n" + "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + "              <void property=\"typeInfo\"> \n" + "               <object idref=\"PrimitiveTypeInfo2\"/> \n" + "              </void> \n" + "              <void property=\"value\"> \n" + "               <string>john</string> \n" + "              </void> \n" + "             </object> \n" + "            </void> \n" + "            <void method=\"add\"> \n" + "             <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + "              <void property=\"typeInfo\"> \n" + "               <object idref=\"PrimitiveTypeInfo2\"/> \n" + "              </void> \n" + "              <void property=\"value\"> \n" + "               <string>sue</string> \n" + "              </void> \n" + "             </object> \n" + "            </void> \n" + "           </object> \n" + "          </void> \n" + "          <void property=\"genericUDF\"> \n" + "           <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn\"/> \n" + "          </void> \n" + "          <void property=\"typeInfo\"> \n" + "           <object idref=\"PrimitiveTypeInfo1\"/> \n" + "          </void> \n" + "         </object> \n" + "        </void> \n" + "       </object> \n" + "      </void> \n" + "      <void property=\"genericUDF\"> \n" + "       <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd\"/> \n" + "      </void> \n" + "      <void property=\"typeInfo\"> \n" + "       <object idref=\"PrimitiveTypeInfo1\"/> \n" + "      </void> \n" + "     </object> \n" + "    </void> \n" + "    <void method=\"add\"> \n" + "     <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + "      <void property=\"children\"> \n" + "       <object class=\"java.util.ArrayList\"> \n" + "        <void method=\"add\"> \n" + "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + "          <void property=\"column\"> \n" + "           <string>id</string> \n" + "          </void> \n" + "          <void property=\"tabAlias\"> \n" + "           <string>orc_people</string> \n" + "          </void> \n" + "          <void property=\"typeInfo\"> \n" + "           <object idref=\"PrimitiveTypeInfo0\"/> \n" + "          </void> \n" + "         </object> \n" + "        </void> \n" + "        <void method=\"add\"> \n" + "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + "          <void property=\"typeInfo\"> \n" + "           <object idref=\"PrimitiveTypeInfo0\"/> \n" + "          </void> \n" + "          <void property=\"value\"> \n" + "           <int>34</int> \n" + "          </void> \n" + "         </object> \n" + "        </void> \n" + "        <void method=\"add\"> \n" + "         <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + "          <void property=\"typeInfo\"> \n" + "           <object idref=\"PrimitiveTypeInfo0\"/> \n" + "          </void> \n" + "          <void property=\"value\"> \n" + "           <int>50</int> \n" + "          </void> \n" + "         </object> \n" + "        </void> \n" + "       </object> \n" + "      </void> \n" + "      <void property=\"genericUDF\"> \n" + "       <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn\"/> \n" + "      </void> \n" + "      <void property=\"typeInfo\"> \n" + "       <object idref=\"PrimitiveTypeInfo1\"/> \n" + "      </void> \n" + "     </object> \n" + "    </void> \n" + "   </object> \n" + "  </void> \n" + "  <void property=\"genericUDF\"> \n" + "   <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd\"/> \n" + "  </void> \n" + "  <void property=\"typeInfo\"> \n" + "   <object idref=\"PrimitiveTypeInfo1\"/> \n" + "  </void> \n" + " </object> \n" + "</java> \n" + "\n";
    SearchArgumentImpl sarg = (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr));
    List<PredicateLeaf> leaves = sarg.getLeaves();
    assertEquals(3, leaves.size());
    String[] conditions = new String[] { "not(eq(id, 12))", /* id <> 12 */
    "or(eq(first_name, Binary{\"john\"}), eq(first_name, Binary{\"sue\"}))", /* first_name in
      ('john', 'sue') */
    "or(eq(id, 34), eq(id, 50))" /* id in (34,50) */
    };
    MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 id;" + " required binary first_name; }");
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    String expected = String.format("and(and(%1$s, %2$s), %3$s)", conditions);
    assertEquals(expected, p.toString());
    PredicateLeaf leaf = leaves.get(0);
    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
    assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
    assertEquals("id", leaf.getColumnName());
    assertEquals(12L, leaf.getLiteral());
    leaf = leaves.get(1);
    assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
    assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator());
    assertEquals("first_name", leaf.getColumnName());
    assertEquals("john", leaf.getLiteralList().get(0));
    assertEquals("sue", leaf.getLiteralList().get(1));
    leaf = leaves.get(2);
    assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
    assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator());
    assertEquals("id", leaf.getColumnName());
    assertEquals(34L, leaf.getLiteralList().get(0));
    assertEquals(50L, leaf.getLiteralList().get(1));
    assertEquals("(and (not leaf-0) leaf-1 leaf-2)", sarg.getExpression().toString());
    assertNoSharedNodes(sarg.getExpression(), Sets.<ExpressionTree>newIdentityHashSet());
    assertEquals(TruthValue.YES, sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.YES)));
    assertEquals(TruthValue.NULL, sarg.evaluate(values(TruthValue.NULL, TruthValue.YES, TruthValue.YES)));
    assertEquals(TruthValue.NULL, sarg.evaluate(values(TruthValue.NO, TruthValue.NULL, TruthValue.YES)));
    assertEquals(TruthValue.NO, sarg.evaluate(values(TruthValue.YES, TruthValue.YES, TruthValue.YES)));
    assertEquals(TruthValue.NO, sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.NO)));
    assertEquals(TruthValue.NO, sarg.evaluate(values(TruthValue.NO, TruthValue.YES_NULL, TruthValue.NO)));
    assertEquals(TruthValue.NO_NULL, sarg.evaluate(values(TruthValue.NO, TruthValue.NULL, TruthValue.YES_NO_NULL)));
    assertEquals(TruthValue.NO_NULL, sarg.evaluate(values(TruthValue.NO, TruthValue.YES, TruthValue.NO_NULL)));
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 8 with Or

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project hive by apache.

the class FilterPredicateLeafBuilder method buildPredicate.

/**
 * Build filter predicate with multiple constants
 *
 * @param op         IN or BETWEEN
 * @param literals
 * @param columnName
 * @return
 */
public FilterPredicate buildPredicate(PredicateLeaf.Operator op, List<Object> literals, String columnName) throws Exception {
    FilterPredicate result = null;
    switch(op) {
        case IN:
            for (Object literal : literals) {
                if (result == null) {
                    result = buildPredict(PredicateLeaf.Operator.EQUALS, literal, columnName);
                } else {
                    result = or(result, buildPredict(PredicateLeaf.Operator.EQUALS, literal, columnName));
                }
            }
            return result;
        case BETWEEN:
            if (literals.size() != 2) {
                throw new RuntimeException("Not able to build 'between' operation filter with " + literals + " which needs two literals");
            }
            Object min = literals.get(0);
            Object max = literals.get(1);
            FilterPredicate lt = not(buildPredict(PredicateLeaf.Operator.LESS_THAN, min, columnName));
            FilterPredicate gt = buildPredict(PredicateLeaf.Operator.LESS_THAN_EQUALS, max, columnName);
            result = FilterApi.and(gt, lt);
            return result;
        default:
            throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
    }
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 9 with Or

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project parquet-mr by apache.

the class ParquetLoader method setInput.

private void setInput(String location, Job job) throws IOException {
    this.setLocationHasBeenCalled = true;
    this.location = location;
    setInputPaths(job, location);
    // not file metadata or pig framework and would get overwritten in initSchema().
    if (UDFContext.getUDFContext().isFrontend()) {
        storeInUDFContext(PARQUET_COLUMN_INDEX_ACCESS, Boolean.toString(columnIndexAccess));
    }
    schema = PigSchemaConverter.parsePigSchema(getPropertyFromUDFContext(PARQUET_PIG_SCHEMA));
    requiredFieldList = PigSchemaConverter.deserializeRequiredFieldList(getPropertyFromUDFContext(PARQUET_PIG_REQUIRED_FIELDS));
    columnIndexAccess = Boolean.parseBoolean(getPropertyFromUDFContext(PARQUET_COLUMN_INDEX_ACCESS));
    initSchema(job);
    if (UDFContext.getUDFContext().isFrontend()) {
        // Setting for task-side loading via initSchema()
        storeInUDFContext(PARQUET_PIG_SCHEMA, pigSchemaToString(schema));
        storeInUDFContext(PARQUET_PIG_REQUIRED_FIELDS, serializeRequiredFieldList(requiredFieldList));
    }
    // Used by task-side loader via TupleReadSupport
    getConfiguration(job).set(PARQUET_PIG_SCHEMA, pigSchemaToString(schema));
    getConfiguration(job).set(PARQUET_PIG_REQUIRED_FIELDS, serializeRequiredFieldList(requiredFieldList));
    getConfiguration(job).set(PARQUET_COLUMN_INDEX_ACCESS, Boolean.toString(columnIndexAccess));
    FilterPredicate filterPredicate = (FilterPredicate) getFromUDFContext(ParquetInputFormat.FILTER_PREDICATE);
    if (filterPredicate != null) {
        ParquetInputFormat.setFilterPredicate(getConfiguration(job), filterPredicate);
    }
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 10 with Or

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or in project parquet-mr by apache.

the class TestIncrementallyUpdatedFilterPredicateEvaluator method testShortCircuit.

@Test
public void testShortCircuit() {
    ValueInspector neverCalled = new ValueInspector() {

        @Override
        public boolean accept(Visitor visitor) {
            throw new ShortCircuitException();
        }
    };
    try {
        evaluate(neverCalled);
        fail("this should throw");
    } catch (ShortCircuitException e) {
    // 
    }
    // T || X should evaluate to true without inspecting X
    ValueInspector v = intIsEven();
    v.update(10);
    IncrementallyUpdatedFilterPredicate or = new Or(v, neverCalled);
    assertTrue(evaluate(or));
    v.reset();
    // F && X should evaluate to false without inspecting X
    v.update(11);
    IncrementallyUpdatedFilterPredicate and = new And(v, neverCalled);
    assertFalse(evaluate(and));
    v.reset();
}
Also used : ValueInspector(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.ValueInspector) Or(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or) And(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And) Test(org.junit.Test)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)17 Test (org.junit.Test)17 MessageType (org.apache.parquet.schema.MessageType)9 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)4 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)3 Configuration (org.apache.hadoop.conf.Configuration)2 IntColumn (org.apache.parquet.filter2.predicate.Operators.IntColumn)2 And (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And)2 Or (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or)2 ValueInspector (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.ValueInspector)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 ObjectInputStream (java.io.ObjectInputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 ArrayList (java.util.ArrayList)1 Path (org.apache.hadoop.fs.Path)1 DataWritableReadSupport (org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport)1 FileSplit (org.apache.hadoop.mapred.FileSplit)1 Job (org.apache.hadoop.mapreduce.Job)1 Group (org.apache.parquet.example.data.Group)1