Search in sources :

Example 1 with PredicateLeaf

use of org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf in project hive by apache.

the class OrcInputFormat method pickStripesViaTranslatedSarg.

public static boolean[] pickStripesViaTranslatedSarg(SearchArgument sarg, OrcFile.WriterVersion writerVersion, List<OrcProto.Type> types, List<StripeStatistics> stripeStats, int stripeCount) {
    LOG.info("Translated ORC pushdown predicate: " + sarg);
    assert sarg != null;
    if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) {
        // only do split pruning if HIVE-8732 has been fixed in the writer
        return null;
    }
    // eliminate stripes that doesn't satisfy the predicate condition
    List<PredicateLeaf> sargLeaves = sarg.getLeaves();
    int[] filterColumns = RecordReaderImpl.mapTranslatedSargColumns(types, sargLeaves);
    TypeDescription schema = OrcUtils.convertTypeFromProtobuf(types, 0);
    SchemaEvolution evolution = new SchemaEvolution(schema, null);
    return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null, evolution);
}
Also used : PredicateLeaf(org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf) TypeDescription(org.apache.orc.TypeDescription) SchemaEvolution(org.apache.orc.impl.SchemaEvolution)

Example 2 with PredicateLeaf

use of org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf in project hive by apache.

the class ExternalCache method translateSargToTableColIndexes.

/**
   * Modifies the SARG, replacing column names with column indexes in target table schema. This
   * basically does the same thing as all the shennannigans with included columns, except for the
   * last step where ORC gets direct subtypes of root column and uses the ordered match to map
   * table columns to file columns. The numbers put into predicate leaf should allow to go into
   * said subtypes directly by index to get the proper index in the file.
   * This won't work with schema evolution, although it's probably much easier to reason about
   * if schema evolution was to be supported, because this is a clear boundary between table
   * schema columns and all things ORC. None of the ORC stuff is used here and none of the
   * table schema stuff is used after that - ORC doesn't need a bunch of extra crap to apply
   * the SARG thus modified.
   */
public static void translateSargToTableColIndexes(SearchArgument sarg, Configuration conf, int rootColumn) {
    String nameStr = OrcInputFormat.getNeededColumnNamesString(conf), idStr = OrcInputFormat.getSargColumnIDsString(conf);
    String[] knownNames = nameStr.split(",");
    String[] idStrs = (idStr == null) ? null : idStr.split(",");
    assert idStrs == null || knownNames.length == idStrs.length;
    HashMap<String, Integer> nameIdMap = new HashMap<>();
    for (int i = 0; i < knownNames.length; ++i) {
        Integer newId = (idStrs != null) ? Integer.parseInt(idStrs[i]) : i;
        Integer oldId = nameIdMap.put(knownNames[i], newId);
        if (oldId != null && oldId.intValue() != newId.intValue()) {
            throw new RuntimeException("Multiple IDs for " + knownNames[i] + " in column strings: [" + idStr + "], [" + nameStr + "]");
        }
    }
    List<PredicateLeaf> leaves = sarg.getLeaves();
    for (int i = 0; i < leaves.size(); ++i) {
        PredicateLeaf pl = leaves.get(i);
        Integer colId = nameIdMap.get(pl.getColumnName());
        String newColName = RecordReaderImpl.encodeTranslatedSargColumn(rootColumn, colId);
        SearchArgumentFactory.setPredicateLeafColumn(pl, newColName);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("SARG translated into " + sarg);
    }
}
Also used : HashMap(java.util.HashMap) PredicateLeaf(org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf)

Example 3 with PredicateLeaf

use of org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf in project hive by apache.

the class TestInputOutputFormat method testSetSearchArgument.

@Test
public void testSetSearchArgument() throws Exception {
    Reader.Options options = new Reader.Options();
    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
    OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
    builder.setKind(OrcProto.Type.Kind.STRUCT).addAllFieldNames(Arrays.asList("op", "otid", "bucket", "rowid", "ctid", "row")).addAllSubtypes(Arrays.asList(1, 2, 3, 4, 5, 6));
    types.add(builder.build());
    builder.clear().setKind(OrcProto.Type.Kind.INT);
    types.add(builder.build());
    types.add(builder.build());
    types.add(builder.build());
    types.add(builder.build());
    types.add(builder.build());
    builder.clear().setKind(OrcProto.Type.Kind.STRUCT).addAllFieldNames(Arrays.asList("url", "purchase", "cost", "store")).addAllSubtypes(Arrays.asList(7, 8, 9, 10));
    types.add(builder.build());
    builder.clear().setKind(OrcProto.Type.Kind.STRING);
    types.add(builder.build());
    builder.clear().setKind(OrcProto.Type.Kind.INT);
    types.add(builder.build());
    types.add(builder.build());
    types.add(builder.build());
    SearchArgument isNull = SearchArgumentFactory.newBuilder().startAnd().isNull("cost", PredicateLeaf.Type.LONG).end().build();
    conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, toKryo(isNull));
    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "url,cost");
    options.include(new boolean[] { true, true, false, true, false });
    OrcInputFormat.setSearchArgument(options, types, conf, false);
    String[] colNames = options.getColumnNames();
    assertEquals(null, colNames[0]);
    assertEquals("url", colNames[1]);
    assertEquals(null, colNames[2]);
    assertEquals("cost", colNames[3]);
    assertEquals(null, colNames[4]);
    SearchArgument arg = options.getSearchArgument();
    List<PredicateLeaf> leaves = arg.getLeaves();
    assertEquals("cost", leaves.get(0).getColumnName());
    assertEquals(PredicateLeaf.Operator.IS_NULL, leaves.get(0).getOperator());
}
Also used : OrcProto(org.apache.orc.OrcProto) ArrayList(java.util.ArrayList) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) PredicateLeaf(org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf) Test(org.junit.Test)

Example 4 with PredicateLeaf

use of org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf in project hive by apache.

the class OrcInputFormat method isStripeSatisfyPredicate.

private static boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns, final SchemaEvolution evolution) {
    List<PredicateLeaf> predLeaves = sarg.getLeaves();
    TruthValue[] truthValues = new TruthValue[predLeaves.size()];
    for (int pred = 0; pred < truthValues.length; pred++) {
        if (filterColumns[pred] != -1) {
            if (evolution != null && !evolution.isPPDSafeConversion(filterColumns[pred])) {
                truthValues[pred] = TruthValue.YES_NO_NULL;
            } else {
                // column statistics at index 0 contains only the number of rows
                ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
                truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
            }
        } else {
            // parition column case.
            // partition filter will be evaluated by partition pruner so
            // we will not evaluate partition filter here.
            truthValues[pred] = TruthValue.YES_NO_NULL;
        }
    }
    return sarg.evaluate(truthValues).isNeeded();
}
Also used : ColumnStatistics(org.apache.orc.ColumnStatistics) PredicateLeaf(org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf) TruthValue(org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue)

Aggregations

PredicateLeaf (org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf)4 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)1 TruthValue (org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue)1 ColumnStatistics (org.apache.orc.ColumnStatistics)1 OrcProto (org.apache.orc.OrcProto)1 TypeDescription (org.apache.orc.TypeDescription)1 SchemaEvolution (org.apache.orc.impl.SchemaEvolution)1 Test (org.junit.Test)1