use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project hive by apache.
the class TestConvertAstToSearchArg method testExpression3.
@Test
public void testExpression3() throws Exception {
/* (id between 23 and 45) and
first_name = 'alan' and
substr('xxxxx', 3) == first_name and
'smith' = last_name and
substr(first_name, 3) == 'yyy' */
String exprStr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?> \n" + "<java version=\"1.6.0_31\" class=\"java.beans.XMLDecoder\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object id=\"PrimitiveTypeInfo0\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + " <void property=\"typeName\"> \n" + " <string>boolean</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <boolean>false</boolean> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>id</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object id=\"PrimitiveTypeInfo1\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + " <void property=\"typeName\"> \n" + " <string>int</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <int>23</int> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <int>45</int> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>first_name</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object id=\"PrimitiveTypeInfo2\" class=\"org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo\"> \n" + " <void property=\"typeName\"> \n" + " <string>string</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <string>alan</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <string>xxxxx</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <int>3</int> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge\"> \n" + " <void property=\"udfClassName\"> \n" + " <string>org.apache.hadoop.hive.ql.udf.UDFSubstr</string> \n" + " </void> \n" + " <void property=\"udfName\"> \n" + " <string>substr</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>first_name</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <string>smith</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>last_name</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc\"> \n" + " <void property=\"children\"> \n" + " <object class=\"java.util.ArrayList\"> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc\"> \n" + " <void property=\"column\"> \n" + " <string>first_name</string> \n" + " </void> \n" + " <void property=\"tabAlias\"> \n" + " <string>orc_people</string> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo1\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <int>3</int> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge\"> \n" + " <void property=\"udfClassName\"> \n" + " <string>org.apache.hadoop.hive.ql.udf.UDFSubstr</string> \n" + " </void> \n" + " <void property=\"udfName\"> \n" + " <string>substr</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void method=\"add\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc\"> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo2\"/> \n" + " </void> \n" + " <void property=\"value\"> \n" + " <string>yyy</string> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " </object> \n" + " </void> \n" + " <void property=\"genericUDF\"> \n" + " <object class=\"org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd\"/> \n" + " </void> \n" + " <void property=\"typeInfo\"> \n" + " <object idref=\"PrimitiveTypeInfo0\"/> \n" + " </void> \n" + " </object> \n" + "</java> \n";
SearchArgumentImpl sarg = (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr));
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(3, leaves.size());
String[] conditions = new String[] { "lteq(id, 45)", /* id between 23 and 45 */
"not(lt(id, 23))", /* id between 23 and 45 */
"eq(first_name, Binary{\"alan\"})", /* first_name = 'alan' */
"eq(last_name, Binary{\"smith\"})" /* 'smith' = last_name */
};
MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 id;" + " required binary first_name; required binary last_name;}");
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = String.format("and(and(and(%1$s, %2$s), %3$s), %4$s)", conditions);
assertEquals(expected, p.toString());
PredicateLeaf leaf = leaves.get(0);
assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator());
assertEquals("id", leaf.getColumnName());
assertEquals(null, leaf.getLiteral());
assertEquals(23L, leaf.getLiteralList().get(0));
assertEquals(45L, leaf.getLiteralList().get(1));
leaf = leaves.get(1);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("first_name", leaf.getColumnName());
assertEquals("alan", leaf.getLiteral());
leaf = leaves.get(2);
assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
assertEquals("last_name", leaf.getColumnName());
assertEquals("smith", leaf.getLiteral());
assertEquals("(and leaf-0 leaf-1 leaf-2)", sarg.getExpression().toString());
assertNoSharedNodes(sarg.getExpression(), Sets.<ExpressionTree>newIdentityHashSet());
}
use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project parquet-mr by apache.
the class ParquetLoader method setInput.
private void setInput(String location, Job job) throws IOException {
this.setLocationHasBeenCalled = true;
this.location = location;
setInputPaths(job, location);
// not file metadata or pig framework and would get overwritten in initSchema().
if (UDFContext.getUDFContext().isFrontend()) {
storeInUDFContext(PARQUET_COLUMN_INDEX_ACCESS, Boolean.toString(columnIndexAccess));
}
schema = PigSchemaConverter.parsePigSchema(getPropertyFromUDFContext(PARQUET_PIG_SCHEMA));
requiredFieldList = PigSchemaConverter.deserializeRequiredFieldList(getPropertyFromUDFContext(PARQUET_PIG_REQUIRED_FIELDS));
columnIndexAccess = Boolean.parseBoolean(getPropertyFromUDFContext(PARQUET_COLUMN_INDEX_ACCESS));
initSchema(job);
if (UDFContext.getUDFContext().isFrontend()) {
// Setting for task-side loading via initSchema()
storeInUDFContext(PARQUET_PIG_SCHEMA, pigSchemaToString(schema));
storeInUDFContext(PARQUET_PIG_REQUIRED_FIELDS, serializeRequiredFieldList(requiredFieldList));
}
// Used by task-side loader via TupleReadSupport
getConfiguration(job).set(PARQUET_PIG_SCHEMA, pigSchemaToString(schema));
getConfiguration(job).set(PARQUET_PIG_REQUIRED_FIELDS, serializeRequiredFieldList(requiredFieldList));
getConfiguration(job).set(PARQUET_COLUMN_INDEX_ACCESS, Boolean.toString(columnIndexAccess));
FilterPredicate filterPredicate = (FilterPredicate) getFromUDFContext(ParquetInputFormat.FILTER_PREDICATE);
if (filterPredicate != null) {
ParquetInputFormat.setFilterPredicate(getConfiguration(job), filterPredicate);
}
}
use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project parquet-mr by apache.
the class TestIncrementallyUpdatedFilterPredicateEvaluator method testValueInspector.
@Test
public void testValueInspector() {
// known, and set to false criteria, null considered false
ValueInspector v = intIsEven();
v.update(11);
assertFalse(evaluate(v));
v.reset();
// known and set to true criteria, null considered false
v.update(12);
assertTrue(evaluate(v));
v.reset();
// known and set to null, null considered false
v.updateNull();
assertFalse(evaluate(v));
v.reset();
// known, and set to false criteria, null considered true
ValueInspector intIsNull = intIsNull();
intIsNull.update(10);
assertFalse(evaluate(intIsNull));
intIsNull.reset();
// known, and set to false criteria, null considered true
intIsNull.updateNull();
assertTrue(evaluate(intIsNull));
intIsNull.reset();
// unknown, null considered false
v.reset();
assertFalse(evaluate(v));
// unknown, null considered true
intIsNull.reset();
assertTrue(evaluate(intIsNull));
}
use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project parquet-mr by apache.
the class TestIncrementallyUpdatedFilterPredicateEvaluator method testShortCircuit.
@Test
public void testShortCircuit() {
ValueInspector neverCalled = new ValueInspector() {
@Override
public boolean accept(Visitor visitor) {
throw new ShortCircuitException();
}
};
try {
evaluate(neverCalled);
fail("this should throw");
} catch (ShortCircuitException e) {
//
}
// T || X should evaluate to true without inspecting X
ValueInspector v = intIsEven();
v.update(10);
IncrementallyUpdatedFilterPredicate or = new Or(v, neverCalled);
assertTrue(evaluate(or));
v.reset();
// F && X should evaluate to false without inspecting X
v.update(11);
IncrementallyUpdatedFilterPredicate and = new And(v, neverCalled);
assertFalse(evaluate(and));
v.reset();
}
use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project parquet-mr by apache.
the class TestIncrementallyUpdatedFilterPredicateResetter method testReset.
@Test
public void testReset() {
ValueInspector intIsNull = intIsNull();
ValueInspector intIsEven = intIsEven();
ValueInspector doubleMoreThan10 = doubleMoreThan10();
IncrementallyUpdatedFilterPredicate pred = new Or(intIsNull, new And(intIsEven, doubleMoreThan10));
intIsNull.updateNull();
intIsEven.update(11);
doubleMoreThan10.update(20.0D);
assertTrue(intIsNull.isKnown());
assertTrue(intIsEven.isKnown());
assertTrue(doubleMoreThan10.isKnown());
IncrementallyUpdatedFilterPredicateResetter.reset(pred);
assertFalse(intIsNull.isKnown());
assertFalse(intIsEven.isKnown());
assertFalse(doubleMoreThan10.isKnown());
intIsNull.updateNull();
assertTrue(intIsNull.isKnown());
assertFalse(intIsEven.isKnown());
assertFalse(doubleMoreThan10.isKnown());
IncrementallyUpdatedFilterPredicateResetter.reset(pred);
assertFalse(intIsNull.isKnown());
assertFalse(intIsEven.isKnown());
assertFalse(doubleMoreThan10.isKnown());
}
Aggregations