use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.
the class TestParquetRecordReaderWrapper method testBuilder.
@Test
public void testBuilder() throws Exception {
SearchArgument sarg = SearchArgumentFactory.newBuilder().startNot().startOr().isNull("x", PredicateLeaf.Type.LONG).between("y", PredicateLeaf.Type.LONG, 10L, 20L).in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L).nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger").end().end().build();
MessageType schema = MessageTypeParser.parseMessageType("message test {" + " optional int32 x; required int32 y; required int32 z;" + " optional binary a;}");
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = "and(and(and(not(eq(x, null)), not(and(lteq(y, 20), not(lt(y, 10))))), not(or(or(eq(z, 1), " + "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))";
assertEquals(expected, p.toString());
}
use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.
the class TestParquetRecordReaderWrapper method testBuilderFloat.
@Test
public void testBuilderFloat() throws Exception {
SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("x", PredicateLeaf.Type.LONG, 22L).lessThan("x1", PredicateLeaf.Type.LONG, 22L).lessThanEquals("y", PredicateLeaf.Type.STRING, new HiveChar("hi", 10).toString()).equals("z", PredicateLeaf.Type.FLOAT, new Double(0.22)).equals("z1", PredicateLeaf.Type.FLOAT, new Double(0.22)).end().build();
MessageType schema = MessageTypeParser.parseMessageType("message test {" + " required int32 x; required int32 x1;" + " required binary y; required float z; required float z1;}");
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
String expected = "and(and(and(and(lt(x, 22), lt(x1, 22))," + " lteq(y, Binary{\"hi \"})), eq(z, " + "0.22)), eq(z1, 0.22))";
assertEquals(expected, p.toString());
}
use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.
the class ExternalCache method getSerializedSargForMetastore.
private ByteBuffer getSerializedSargForMetastore(boolean isOriginal) {
if (sarg == null)
return null;
ByteBuffer serializedSarg = isOriginal ? sargIsOriginal : sargNotIsOriginal;
if (serializedSarg != null)
return serializedSarg;
SearchArgument sarg2 = sarg;
Kryo kryo = SerializationUtilities.borrowKryo();
try {
if ((isOriginal ? sargNotIsOriginal : sargIsOriginal) == null) {
// In case we need it for the other case.
sarg2 = kryo.copy(sarg2);
}
translateSargToTableColIndexes(sarg2, conf, OrcInputFormat.getRootColumn(isOriginal));
ExternalCache.Baos baos = new Baos();
Output output = new Output(baos);
kryo.writeObject(output, sarg2);
output.flush();
serializedSarg = baos.get();
if (isOriginal) {
sargIsOriginal = serializedSarg;
} else {
sargNotIsOriginal = serializedSarg;
}
} finally {
SerializationUtilities.releaseKryo(kryo);
}
return serializedSarg;
}
use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.
the class TestOrcFile method testPredicatePushdown.
@Test
public void testPredicatePushdown() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 400000L, CompressionKind.NONE, 500, 1000);
for (int i = 0; i < 3500; ++i) {
writer.addRow(new InnerStruct(i * 300, Integer.toHexString(10 * i)));
}
writer.close();
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(3500, reader.getNumberOfRows());
SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().startNot().lessThan("int1", PredicateLeaf.Type.LONG, 300000L).end().lessThan("int1", PredicateLeaf.Type.LONG, 600000L).end().build();
RecordReader rows = reader.rowsOptions(new Reader.Options().range(0L, Long.MAX_VALUE).include(new boolean[] { true, true, true }).searchArgument(sarg, new String[] { null, "int1", "string1" }));
assertEquals(0L, rows.getRowNumber());
OrcStruct row = null;
for (int i = 1000; i < 2000; ++i) {
assertTrue(rows.hasNext());
assertEquals(i, rows.getRowNumber());
row = (OrcStruct) rows.next(row);
assertEquals(300 * i, ((IntWritable) row.getFieldValue(0)).get());
assertEquals(Integer.toHexString(10 * i), row.getFieldValue(1).toString());
}
assertTrue(!rows.hasNext());
assertEquals(3500, rows.getRowNumber());
// look through the file with no rows selected
sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("int1", PredicateLeaf.Type.LONG, 0L).end().build();
rows = reader.rowsOptions(new Reader.Options().range(0L, Long.MAX_VALUE).include(new boolean[] { true, true, true }).searchArgument(sarg, new String[] { null, "int1", "string1" }));
assertTrue(!rows.hasNext());
// select first 100 and last 100 rows
sarg = SearchArgumentFactory.newBuilder().startOr().lessThan("int1", PredicateLeaf.Type.LONG, 300L * 100).startNot().lessThan("int1", PredicateLeaf.Type.LONG, 300L * 3400).end().end().build();
rows = reader.rowsOptions(new Reader.Options().range(0L, Long.MAX_VALUE).include(new boolean[] { true, true, true }).searchArgument(sarg, new String[] { null, "int1", "string1" }));
row = null;
for (int i = 0; i < 1000; ++i) {
assertTrue(rows.hasNext());
assertEquals(i, rows.getRowNumber());
row = (OrcStruct) rows.next(row);
assertEquals(300 * i, ((IntWritable) row.getFieldValue(0)).get());
assertEquals(Integer.toHexString(10 * i), row.getFieldValue(1).toString());
}
for (int i = 3000; i < 3500; ++i) {
assertTrue(rows.hasNext());
assertEquals(i, rows.getRowNumber());
row = (OrcStruct) rows.next(row);
assertEquals(300 * i, ((IntWritable) row.getFieldValue(0)).get());
assertEquals(Integer.toHexString(10 * i), row.getFieldValue(1).toString());
}
assertTrue(!rows.hasNext());
assertEquals(3500, rows.getRowNumber());
}
use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.
the class TestInputOutputFormat method testSplitElimination.
@Test
public void testSplitElimination() throws Exception {
Properties properties = new Properties();
properties.setProperty("columns", "z,r");
properties.setProperty("columns.types", "int:struct<x:int,y:int>");
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
AbstractSerDe serde = new OrcSerde();
OutputFormat<?, ?> outFormat = new OrcOutputFormat();
conf.setInt("mapred.max.split.size", 50);
RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
writer.close(Reporter.NULL);
serde = new OrcSerde();
SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("z", PredicateLeaf.Type.LONG, new Long(0)).end().build();
conf.set("sarg.pushdown", toKryo(sarg));
conf.set("hive.io.file.readcolumn.names", "z,r");
SerDeUtils.initializeSerDe(serde, conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(0, splits.length);
}
Aggregations