use of org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf in project hive by apache.
the class OrcInputFormat method pickStripesViaTranslatedSarg.
public static boolean[] pickStripesViaTranslatedSarg(SearchArgument sarg, OrcFile.WriterVersion writerVersion, List<OrcProto.Type> types, List<StripeStatistics> stripeStats, int stripeCount) {
LOG.info("Translated ORC pushdown predicate: " + sarg);
assert sarg != null;
if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) {
// only do split pruning if HIVE-8732 has been fixed in the writer
return null;
}
// eliminate stripes that doesn't satisfy the predicate condition
List<PredicateLeaf> sargLeaves = sarg.getLeaves();
int[] filterColumns = RecordReaderImpl.mapTranslatedSargColumns(types, sargLeaves);
TypeDescription schema = OrcUtils.convertTypeFromProtobuf(types, 0);
SchemaEvolution evolution = new SchemaEvolution(schema, null);
return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null, evolution);
}
use of org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf in project hive by apache.
the class ExternalCache method translateSargToTableColIndexes.
/**
* Modifies the SARG, replacing column names with column indexes in target table schema. This
* basically does the same thing as all the shennannigans with included columns, except for the
* last step where ORC gets direct subtypes of root column and uses the ordered match to map
* table columns to file columns. The numbers put into predicate leaf should allow to go into
* said subtypes directly by index to get the proper index in the file.
* This won't work with schema evolution, although it's probably much easier to reason about
* if schema evolution was to be supported, because this is a clear boundary between table
* schema columns and all things ORC. None of the ORC stuff is used here and none of the
* table schema stuff is used after that - ORC doesn't need a bunch of extra crap to apply
* the SARG thus modified.
*/
public static void translateSargToTableColIndexes(SearchArgument sarg, Configuration conf, int rootColumn) {
String nameStr = OrcInputFormat.getNeededColumnNamesString(conf), idStr = OrcInputFormat.getSargColumnIDsString(conf);
String[] knownNames = nameStr.split(",");
String[] idStrs = (idStr == null) ? null : idStr.split(",");
assert idStrs == null || knownNames.length == idStrs.length;
HashMap<String, Integer> nameIdMap = new HashMap<>();
for (int i = 0; i < knownNames.length; ++i) {
Integer newId = (idStrs != null) ? Integer.parseInt(idStrs[i]) : i;
Integer oldId = nameIdMap.put(knownNames[i], newId);
if (oldId != null && oldId.intValue() != newId.intValue()) {
throw new RuntimeException("Multiple IDs for " + knownNames[i] + " in column strings: [" + idStr + "], [" + nameStr + "]");
}
}
List<PredicateLeaf> leaves = sarg.getLeaves();
for (int i = 0; i < leaves.size(); ++i) {
PredicateLeaf pl = leaves.get(i);
Integer colId = nameIdMap.get(pl.getColumnName());
String newColName = RecordReaderImpl.encodeTranslatedSargColumn(rootColumn, colId);
SearchArgumentFactory.setPredicateLeafColumn(pl, newColName);
}
if (LOG.isDebugEnabled()) {
LOG.debug("SARG translated into " + sarg);
}
}
use of org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf in project hive by apache.
the class TestInputOutputFormat method testSetSearchArgument.
@Test
public void testSetSearchArgument() throws Exception {
Reader.Options options = new Reader.Options();
List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
builder.setKind(OrcProto.Type.Kind.STRUCT).addAllFieldNames(Arrays.asList("op", "otid", "bucket", "rowid", "ctid", "row")).addAllSubtypes(Arrays.asList(1, 2, 3, 4, 5, 6));
types.add(builder.build());
builder.clear().setKind(OrcProto.Type.Kind.INT);
types.add(builder.build());
types.add(builder.build());
types.add(builder.build());
types.add(builder.build());
types.add(builder.build());
builder.clear().setKind(OrcProto.Type.Kind.STRUCT).addAllFieldNames(Arrays.asList("url", "purchase", "cost", "store")).addAllSubtypes(Arrays.asList(7, 8, 9, 10));
types.add(builder.build());
builder.clear().setKind(OrcProto.Type.Kind.STRING);
types.add(builder.build());
builder.clear().setKind(OrcProto.Type.Kind.INT);
types.add(builder.build());
types.add(builder.build());
types.add(builder.build());
SearchArgument isNull = SearchArgumentFactory.newBuilder().startAnd().isNull("cost", PredicateLeaf.Type.LONG).end().build();
conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, toKryo(isNull));
conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "url,cost");
options.include(new boolean[] { true, true, false, true, false });
OrcInputFormat.setSearchArgument(options, types, conf, false);
String[] colNames = options.getColumnNames();
assertEquals(null, colNames[0]);
assertEquals("url", colNames[1]);
assertEquals(null, colNames[2]);
assertEquals("cost", colNames[3]);
assertEquals(null, colNames[4]);
SearchArgument arg = options.getSearchArgument();
List<PredicateLeaf> leaves = arg.getLeaves();
assertEquals("cost", leaves.get(0).getColumnName());
assertEquals(PredicateLeaf.Operator.IS_NULL, leaves.get(0).getOperator());
}
use of org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf in project hive by apache.
the class OrcInputFormat method isStripeSatisfyPredicate.
private static boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns, final SchemaEvolution evolution) {
List<PredicateLeaf> predLeaves = sarg.getLeaves();
TruthValue[] truthValues = new TruthValue[predLeaves.size()];
for (int pred = 0; pred < truthValues.length; pred++) {
if (filterColumns[pred] != -1) {
if (evolution != null && !evolution.isPPDSafeConversion(filterColumns[pred])) {
truthValues[pred] = TruthValue.YES_NO_NULL;
} else {
// column statistics at index 0 contains only the number of rows
ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
}
} else {
// parition column case.
// partition filter will be evaluated by partition pruner so
// we will not evaluate partition filter here.
truthValues[pred] = TruthValue.YES_NO_NULL;
}
}
return sarg.evaluate(truthValues).isNeeded();
}
Aggregations