Search in sources :

Example 1 with SearchArgument

use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.

the class TestParquetRecordReaderWrapper method testBuilder.

@Test
public void testBuilder() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("x", PredicateLeaf.Type.LONG).between("y", PredicateLeaf.Type.LONG, 10L, 20L).in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L).nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger").end().end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " optional int32 x; required int32 y; required int32 z;" + " optional binary a;}");
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    String expected = "and(and(and(not(eq(x, null)), not(and(lteq(y, 20), not(lt(y, 10))))), not(or(or(eq(z, 1), " + "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))";
    assertEquals(expected, p.toString());
}
Also used : SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 2 with SearchArgument

use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.

the class TestParquetRecordReaderWrapper method testBuilderFloat.

@Test
public void testBuilderFloat() throws Exception {
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("x", PredicateLeaf.Type.LONG, 22L).lessThan("x1", PredicateLeaf.Type.LONG, 22L).lessThanEquals("y", PredicateLeaf.Type.STRING, new HiveChar("hi", 10).toString()).equals("z", PredicateLeaf.Type.FLOAT, new Double(0.22)).equals("z1", PredicateLeaf.Type.FLOAT, new Double(0.22)).end().build();
    MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required int32 x; required int32 x1;" + " required binary y; required float z; required float z1;}");
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    String expected = "and(and(and(and(lt(x, 22), lt(x1, 22))," + " lteq(y, Binary{\"hi        \"})), eq(z, " + "0.22)), eq(z1, 0.22))";
    assertEquals(expected, p.toString());
}
Also used : HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 3 with SearchArgument

use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.

the class ExternalCache method getSerializedSargForMetastore.

private ByteBuffer getSerializedSargForMetastore(boolean isOriginal) {
    if (sarg == null)
        return null;
    ByteBuffer serializedSarg = isOriginal ? sargIsOriginal : sargNotIsOriginal;
    if (serializedSarg != null)
        return serializedSarg;
    SearchArgument sarg2 = sarg;
    Kryo kryo = SerializationUtilities.borrowKryo();
    try {
        if ((isOriginal ? sargNotIsOriginal : sargIsOriginal) == null) {
            // In case we need it for the other case.
            sarg2 = kryo.copy(sarg2);
        }
        translateSargToTableColIndexes(sarg2, conf, OrcInputFormat.getRootColumn(isOriginal));
        ExternalCache.Baos baos = new Baos();
        Output output = new Output(baos);
        kryo.writeObject(output, sarg2);
        output.flush();
        serializedSarg = baos.get();
        if (isOriginal) {
            sargIsOriginal = serializedSarg;
        } else {
            sargNotIsOriginal = serializedSarg;
        }
    } finally {
        SerializationUtilities.releaseKryo(kryo);
    }
    return serializedSarg;
}
Also used : Output(com.esotericsoftware.kryo.io.Output) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) ByteBuffer(java.nio.ByteBuffer) Kryo(com.esotericsoftware.kryo.Kryo)

Example 4 with SearchArgument

use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.

the class TestOrcFile method testPredicatePushdown.

@Test
public void testPredicatePushdown() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 400000L, CompressionKind.NONE, 500, 1000);
    for (int i = 0; i < 3500; ++i) {
        writer.addRow(new InnerStruct(i * 300, Integer.toHexString(10 * i)));
    }
    writer.close();
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    assertEquals(3500, reader.getNumberOfRows());
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().startNot().lessThan("int1", PredicateLeaf.Type.LONG, 300000L).end().lessThan("int1", PredicateLeaf.Type.LONG, 600000L).end().build();
    RecordReader rows = reader.rowsOptions(new Reader.Options().range(0L, Long.MAX_VALUE).include(new boolean[] { true, true, true }).searchArgument(sarg, new String[] { null, "int1", "string1" }));
    assertEquals(0L, rows.getRowNumber());
    OrcStruct row = null;
    for (int i = 1000; i < 2000; ++i) {
        assertTrue(rows.hasNext());
        assertEquals(i, rows.getRowNumber());
        row = (OrcStruct) rows.next(row);
        assertEquals(300 * i, ((IntWritable) row.getFieldValue(0)).get());
        assertEquals(Integer.toHexString(10 * i), row.getFieldValue(1).toString());
    }
    assertTrue(!rows.hasNext());
    assertEquals(3500, rows.getRowNumber());
    // look through the file with no rows selected
    sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("int1", PredicateLeaf.Type.LONG, 0L).end().build();
    rows = reader.rowsOptions(new Reader.Options().range(0L, Long.MAX_VALUE).include(new boolean[] { true, true, true }).searchArgument(sarg, new String[] { null, "int1", "string1" }));
    assertTrue(!rows.hasNext());
    // select first 100 and last 100 rows
    sarg = SearchArgumentFactory.newBuilder().startOr().lessThan("int1", PredicateLeaf.Type.LONG, 300L * 100).startNot().lessThan("int1", PredicateLeaf.Type.LONG, 300L * 3400).end().end().build();
    rows = reader.rowsOptions(new Reader.Options().range(0L, Long.MAX_VALUE).include(new boolean[] { true, true, true }).searchArgument(sarg, new String[] { null, "int1", "string1" }));
    row = null;
    for (int i = 0; i < 1000; ++i) {
        assertTrue(rows.hasNext());
        assertEquals(i, rows.getRowNumber());
        row = (OrcStruct) rows.next(row);
        assertEquals(300 * i, ((IntWritable) row.getFieldValue(0)).get());
        assertEquals(Integer.toHexString(10 * i), row.getFieldValue(1).toString());
    }
    for (int i = 3000; i < 3500; ++i) {
        assertTrue(rows.hasNext());
        assertEquals(i, rows.getRowNumber());
        row = (OrcStruct) rows.next(row);
        assertEquals(300 * i, ((IntWritable) row.getFieldValue(0)).get());
        assertEquals(Integer.toHexString(10 * i), row.getFieldValue(1).toString());
    }
    assertTrue(!rows.hasNext());
    assertEquals(3500, rows.getRowNumber());
}
Also used : HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) Test(org.junit.Test)

Example 5 with SearchArgument

use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.

the class TestInputOutputFormat method testSplitElimination.

@Test
public void testSplitElimination() throws Exception {
    Properties properties = new Properties();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    AbstractSerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    conf.setInt("mapred.max.split.size", 50);
    RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("z", PredicateLeaf.Type.LONG, new Long(0)).end().build();
    conf.set("sarg.pushdown", toKryo(sarg));
    conf.set("hive.io.file.readcolumn.names", "z,r");
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(0, splits.length);
}
Also used : SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) Properties(java.util.Properties) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) RecordWriter(org.apache.hadoop.mapred.RecordWriter) InputSplit(org.apache.hadoop.mapred.InputSplit) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)15 Test (org.junit.Test)11 MessageType (org.apache.parquet.schema.MessageType)7 FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)6 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3 ByteBuffer (java.nio.ByteBuffer)2 Properties (java.util.Properties)2 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)2 AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)2 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 RecordWriter (org.apache.hadoop.mapred.RecordWriter)2 Kryo (com.esotericsoftware.kryo.Kryo)1 Output (com.esotericsoftware.kryo.io.Output)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 SplitInfos (org.apache.hadoop.hive.metastore.Metastore.SplitInfos)1 PredicateLeaf (org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf)1 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)1