use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.
the class TestInputOutputFormat method testSplitEliminationNullStats.
@Test
public void testSplitEliminationNullStats() throws Exception {
Properties properties = new Properties();
StructObjectInspector inspector = createSoi();
AbstractSerDe serde = new OrcSerde();
OutputFormat<?, ?> outFormat = new OrcOutputFormat();
conf.setInt("mapred.max.split.size", 50);
RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
writer.write(NullWritable.get(), serde.serialize(new SimpleRow(null), inspector));
writer.write(NullWritable.get(), serde.serialize(new SimpleRow(null), inspector));
writer.write(NullWritable.get(), serde.serialize(new SimpleRow(null), inspector));
writer.close(Reporter.NULL);
serde = new OrcSerde();
SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("z", PredicateLeaf.Type.STRING, new String("foo")).end().build();
conf.set("sarg.pushdown", toKryo(sarg));
conf.set("hive.io.file.readcolumn.names", "z");
properties.setProperty("columns", "z");
properties.setProperty("columns.types", "string");
SerDeUtils.initializeSerDe(serde, conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(0, splits.length);
}
use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.
the class TestInputOutputFormat method testSetSearchArgument.
@Test
public void testSetSearchArgument() throws Exception {
Reader.Options options = new Reader.Options();
List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
builder.setKind(OrcProto.Type.Kind.STRUCT).addAllFieldNames(Arrays.asList("op", "otid", "bucket", "rowid", "ctid", "row")).addAllSubtypes(Arrays.asList(1, 2, 3, 4, 5, 6));
types.add(builder.build());
builder.clear().setKind(OrcProto.Type.Kind.INT);
types.add(builder.build());
types.add(builder.build());
types.add(builder.build());
types.add(builder.build());
types.add(builder.build());
builder.clear().setKind(OrcProto.Type.Kind.STRUCT).addAllFieldNames(Arrays.asList("url", "purchase", "cost", "store")).addAllSubtypes(Arrays.asList(7, 8, 9, 10));
types.add(builder.build());
builder.clear().setKind(OrcProto.Type.Kind.STRING);
types.add(builder.build());
builder.clear().setKind(OrcProto.Type.Kind.INT);
types.add(builder.build());
types.add(builder.build());
types.add(builder.build());
SearchArgument isNull = SearchArgumentFactory.newBuilder().startAnd().isNull("cost", PredicateLeaf.Type.LONG).end().build();
conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, toKryo(isNull));
conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "url,cost");
options.include(new boolean[] { true, true, false, true, false });
OrcInputFormat.setSearchArgument(options, types, conf, false);
String[] colNames = options.getColumnNames();
assertEquals(null, colNames[0]);
assertEquals("url", colNames[1]);
assertEquals(null, colNames[2]);
assertEquals("cost", colNames[3]);
assertEquals(null, colNames[4]);
SearchArgument arg = options.getSearchArgument();
List<PredicateLeaf> leaves = arg.getLeaves();
assertEquals("cost", leaves.get(0).getColumnName());
assertEquals(PredicateLeaf.Operator.IS_NULL, leaves.get(0).getOperator());
}
use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.
the class ParquetRecordReaderBase method setFilter.
public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) {
SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
if (sarg == null) {
return null;
}
// Create the Parquet FilterPredicate without including columns that do not exist
// on the schema (such as partition columns).
FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
if (p != null) {
// Filter may have sensitive information. Do not send to debug.
LOG.debug("PARQUET predicate push down generated.");
ParquetInputFormat.setFilterPredicate(conf, p);
return FilterCompat.get(p);
} else {
// Filter may have sensitive information. Do not send to debug.
LOG.debug("No PARQUET predicate push down is generated.");
return null;
}
}
use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.
the class OrcInputFormat method setSearchArgument.
static void setSearchArgument(Reader.Options options, List<OrcProto.Type> types, Configuration conf, boolean isOriginal) {
String neededColumnNames = getNeededColumnNamesString(conf);
if (neededColumnNames == null) {
LOG.debug("No ORC pushdown predicate - no column names");
options.searchArgument(null, null);
return;
}
SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
if (sarg == null) {
LOG.debug("No ORC pushdown predicate");
options.searchArgument(null, null);
return;
}
if (LOG.isInfoEnabled()) {
LOG.info("ORC pushdown predicate: " + sarg);
}
options.searchArgument(sarg, getSargColumnNames(neededColumnNames.split(","), types, options.getInclude(), isOriginal));
}
use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.
the class OrcFileMetadataHandler method getFileMetadataByExpr.
@Override
public void getFileMetadataByExpr(List<Long> fileIds, byte[] expr, ByteBuffer[] metadatas, ByteBuffer[] results, boolean[] eliminated) throws IOException {
SearchArgument sarg = getExpressionProxy().createSarg(expr);
// For now, don't push anything into HBase, nor store anything special in HBase
if (metadatas == null) {
// null means don't return metadata; we'd need the array anyway for now.
metadatas = new ByteBuffer[results.length];
}
getStore().getFileMetadata(fileIds, metadatas);
for (int i = 0; i < metadatas.length; ++i) {
eliminated[i] = false;
results[i] = null;
if (metadatas[i] == null)
continue;
// Duplicate to avoid modification.
ByteBuffer metadata = metadatas[i].duplicate();
SplitInfos result = null;
try {
result = getFileFormatProxy().applySargToMetadata(sarg, metadata);
} catch (IOException ex) {
LOG.error("Failed to apply SARG to metadata", ex);
metadatas[i] = null;
continue;
}
eliminated[i] = (result == null);
if (!eliminated[i]) {
results[i] = ByteBuffer.wrap(result.toByteArray());
}
}
}
Aggregations