Search in sources :

Example 1 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project hive by apache.

the class TestParquetRecordReaderWrapper method testBuilder.

@Test
public void testBuilder() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("x", PredicateLeaf.Type.LONG).between("y", PredicateLeaf.Type.LONG, 10L, 20L).in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L).nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger").end().end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " optional int32 x; required int32 y; required int32 z;" + " optional binary a;}");
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    String expected = "and(and(and(not(eq(x, null)), not(and(lteq(y, 20), not(lt(y, 10))))), not(or(or(eq(z, 1), " + "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))";
    assertEquals(expected, p.toString());
}
Also used : SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 2 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project hive by apache.

the class TestParquetRecordReaderWrapper method testBuilderFloat.

@Test
public void testBuilderFloat() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("x", PredicateLeaf.Type.LONG, 22L).lessThan("x1", PredicateLeaf.Type.LONG, 22L).lessThanEquals("y", PredicateLeaf.Type.STRING, new HiveChar("hi", 10).toString()).equals("z", PredicateLeaf.Type.FLOAT, new Double(0.22)).equals("z1", PredicateLeaf.Type.FLOAT, new Double(0.22)).end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required int32 x; required int32 x1;" + " required binary y; required float z; required float z1;}");
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    String expected = "and(and(and(and(lt(x, 22), lt(x1, 22))," + " lteq(y, Binary{\"hi        \"})), eq(z, " + "0.22)), eq(z1, 0.22))";
    assertEquals(expected, p.toString());
}
Also used : HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 3 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project parquet-mr by apache.

the class ParquetLoader method setInput.

private void setInput(String location, Job job) throws IOException {
    this.setLocationHasBeenCalled = true;
    this.location = location;
    setInputPaths(job, location);
    // not file metadata or pig framework and would get overwritten in initSchema().
    if (UDFContext.getUDFContext().isFrontend()) {
        storeInUDFContext(PARQUET_COLUMN_INDEX_ACCESS, Boolean.toString(columnIndexAccess));
    }
    schema = PigSchemaConverter.parsePigSchema(getPropertyFromUDFContext(PARQUET_PIG_SCHEMA));
    requiredFieldList = PigSchemaConverter.deserializeRequiredFieldList(getPropertyFromUDFContext(PARQUET_PIG_REQUIRED_FIELDS));
    columnIndexAccess = Boolean.parseBoolean(getPropertyFromUDFContext(PARQUET_COLUMN_INDEX_ACCESS));
    initSchema(job);
    if (UDFContext.getUDFContext().isFrontend()) {
        // Setting for task-side loading via initSchema()
        storeInUDFContext(PARQUET_PIG_SCHEMA, pigSchemaToString(schema));
        storeInUDFContext(PARQUET_PIG_REQUIRED_FIELDS, serializeRequiredFieldList(requiredFieldList));
    }
    // Used by task-side loader via TupleReadSupport
    getConfiguration(job).set(PARQUET_PIG_SCHEMA, pigSchemaToString(schema));
    getConfiguration(job).set(PARQUET_PIG_REQUIRED_FIELDS, serializeRequiredFieldList(requiredFieldList));
    getConfiguration(job).set(PARQUET_COLUMN_INDEX_ACCESS, Boolean.toString(columnIndexAccess));
    FilterPredicate filterPredicate = (FilterPredicate) getFromUDFContext(ParquetInputFormat.FILTER_PREDICATE);
    if (filterPredicate != null) {
        ParquetInputFormat.setFilterPredicate(getConfiguration(job), filterPredicate);
    }
}
Also used : FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 4 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project parquet-mr by apache.

the class TestIncrementallyUpdatedFilterPredicateEvaluator method testValueInspector.

@Test
public void testValueInspector() {
    // known, and set to false criteria, null considered false
    ValueInspector v = intIsEven();
    v.update(11);
    assertFalse(evaluate(v));
    v.reset();
    // known and set to true criteria, null considered false
    v.update(12);
    assertTrue(evaluate(v));
    v.reset();
    // known and set to null, null considered false
    v.updateNull();
    assertFalse(evaluate(v));
    v.reset();
    // known, and set to false criteria, null considered true
    ValueInspector intIsNull = intIsNull();
    intIsNull.update(10);
    assertFalse(evaluate(intIsNull));
    intIsNull.reset();
    // known, and set to false criteria, null considered true
    intIsNull.updateNull();
    assertTrue(evaluate(intIsNull));
    intIsNull.reset();
    // unknown, null considered false
    v.reset();
    assertFalse(evaluate(v));
    // unknown, null considered true
    intIsNull.reset();
    assertTrue(evaluate(intIsNull));
}
Also used : ValueInspector(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.ValueInspector) Test(org.junit.Test)

Example 5 with And

use of org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And in project parquet-mr by apache.

the class TestIncrementallyUpdatedFilterPredicateEvaluator method testShortCircuit.

@Test
public void testShortCircuit() {
    ValueInspector neverCalled = new ValueInspector() {

        @Override
        public boolean accept(Visitor visitor) {
            throw new ShortCircuitException();
        }
    };
    try {
        evaluate(neverCalled);
        fail("this should throw");
    } catch (ShortCircuitException e) {
    // 
    }
    // T || X should evaluate to true without inspecting X
    ValueInspector v = intIsEven();
    v.update(10);
    IncrementallyUpdatedFilterPredicate or = new Or(v, neverCalled);
    assertTrue(evaluate(or));
    v.reset();
    // F && X should evaluate to false without inspecting X
    v.update(11);
    IncrementallyUpdatedFilterPredicate and = new And(v, neverCalled);
    assertFalse(evaluate(and));
    v.reset();
}
Also used : ValueInspector(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.ValueInspector) Or(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or) And(org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And) Test(org.junit.Test)

Aggregations

FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)45 Test (org.junit.Test)31 HashMap (java.util.HashMap)22 MessageType (org.apache.parquet.schema.MessageType)22 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)20 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)15 List (java.util.List)6 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)6 ArrayList (java.util.ArrayList)5 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)4 ValueInspector (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.ValueInspector)4 Path (org.apache.hadoop.fs.Path)3 BinaryColumn (org.apache.parquet.filter2.predicate.Operators.BinaryColumn)2 And (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.And)2 Or (org.apache.parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate.Or)2 Test (org.junit.jupiter.api.Test)2 Pair (uk.gov.gchq.gaffer.commonutil.pair.Pair)2 ViewElementDefinition (uk.gov.gchq.gaffer.data.elementdefinition.view.ViewElementDefinition)2 GetElements (uk.gov.gchq.gaffer.operation.impl.get.GetElements)2 ParquetStore (uk.gov.gchq.gaffer.parquetstore.ParquetStore)2