Search in sources :

Example 6 with SearchArgument

use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.

the class TestInputOutputFormat method testSplitEliminationNullStats.

@Test
public void testSplitEliminationNullStats() throws Exception {
    Properties properties = new Properties();
    StructObjectInspector inspector = createSoi();
    AbstractSerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    conf.setInt("mapred.max.split.size", 50);
    RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
    writer.write(NullWritable.get(), serde.serialize(new SimpleRow(null), inspector));
    writer.write(NullWritable.get(), serde.serialize(new SimpleRow(null), inspector));
    writer.write(NullWritable.get(), serde.serialize(new SimpleRow(null), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("z", PredicateLeaf.Type.STRING, new String("foo")).end().build();
    conf.set("sarg.pushdown", toKryo(sarg));
    conf.set("hive.io.file.readcolumn.names", "z");
    properties.setProperty("columns", "z");
    properties.setProperty("columns.types", "string");
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(0, splits.length);
}
Also used : SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) Properties(java.util.Properties) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) RecordWriter(org.apache.hadoop.mapred.RecordWriter) InputSplit(org.apache.hadoop.mapred.InputSplit) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 7 with SearchArgument

use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.

the class TestInputOutputFormat method testSetSearchArgument.

@Test
public void testSetSearchArgument() throws Exception {
    Reader.Options options = new Reader.Options();
    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
    OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
    builder.setKind(OrcProto.Type.Kind.STRUCT).addAllFieldNames(Arrays.asList("op", "otid", "bucket", "rowid", "ctid", "row")).addAllSubtypes(Arrays.asList(1, 2, 3, 4, 5, 6));
    types.add(builder.build());
    builder.clear().setKind(OrcProto.Type.Kind.INT);
    types.add(builder.build());
    types.add(builder.build());
    types.add(builder.build());
    types.add(builder.build());
    types.add(builder.build());
    builder.clear().setKind(OrcProto.Type.Kind.STRUCT).addAllFieldNames(Arrays.asList("url", "purchase", "cost", "store")).addAllSubtypes(Arrays.asList(7, 8, 9, 10));
    types.add(builder.build());
    builder.clear().setKind(OrcProto.Type.Kind.STRING);
    types.add(builder.build());
    builder.clear().setKind(OrcProto.Type.Kind.INT);
    types.add(builder.build());
    types.add(builder.build());
    types.add(builder.build());
    SearchArgument isNull = SearchArgumentFactory.newBuilder().startAnd().isNull("cost", PredicateLeaf.Type.LONG).end().build();
    conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, toKryo(isNull));
    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "url,cost");
    options.include(new boolean[] { true, true, false, true, false });
    OrcInputFormat.setSearchArgument(options, types, conf, false);
    String[] colNames = options.getColumnNames();
    assertEquals(null, colNames[0]);
    assertEquals("url", colNames[1]);
    assertEquals(null, colNames[2]);
    assertEquals("cost", colNames[3]);
    assertEquals(null, colNames[4]);
    SearchArgument arg = options.getSearchArgument();
    List<PredicateLeaf> leaves = arg.getLeaves();
    assertEquals("cost", leaves.get(0).getColumnName());
    assertEquals(PredicateLeaf.Operator.IS_NULL, leaves.get(0).getOperator());
}
Also used : OrcProto(org.apache.orc.OrcProto) ArrayList(java.util.ArrayList) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) PredicateLeaf(org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf) Test(org.junit.Test)

Example 8 with SearchArgument

use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.

the class ParquetRecordReaderBase method setFilter.

public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) {
    SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
    if (sarg == null) {
        return null;
    }
    // Create the Parquet FilterPredicate without including columns that do not exist
    // on the schema (such as partition columns).
    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    if (p != null) {
        // Filter may have sensitive information. Do not send to debug.
        LOG.debug("PARQUET predicate push down generated.");
        ParquetInputFormat.setFilterPredicate(conf, p);
        return FilterCompat.get(p);
    } else {
        // Filter may have sensitive information. Do not send to debug.
        LOG.debug("No PARQUET predicate push down is generated.");
        return null;
    }
}
Also used : SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate)

Example 9 with SearchArgument

use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.

the class OrcInputFormat method setSearchArgument.

static void setSearchArgument(Reader.Options options, List<OrcProto.Type> types, Configuration conf, boolean isOriginal) {
    String neededColumnNames = getNeededColumnNamesString(conf);
    if (neededColumnNames == null) {
        LOG.debug("No ORC pushdown predicate - no column names");
        options.searchArgument(null, null);
        return;
    }
    SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
    if (sarg == null) {
        LOG.debug("No ORC pushdown predicate");
        options.searchArgument(null, null);
        return;
    }
    if (LOG.isInfoEnabled()) {
        LOG.info("ORC pushdown predicate: " + sarg);
    }
    options.searchArgument(sarg, getSargColumnNames(neededColumnNames.split(","), types, options.getInclude(), isOriginal));
}
Also used : SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument)

Example 10 with SearchArgument

use of org.apache.hadoop.hive.ql.io.sarg.SearchArgument in project hive by apache.

the class OrcFileMetadataHandler method getFileMetadataByExpr.

@Override
public void getFileMetadataByExpr(List<Long> fileIds, byte[] expr, ByteBuffer[] metadatas, ByteBuffer[] results, boolean[] eliminated) throws IOException {
    SearchArgument sarg = getExpressionProxy().createSarg(expr);
    // For now, don't push anything into HBase, nor store anything special in HBase
    if (metadatas == null) {
        // null means don't return metadata; we'd need the array anyway for now.
        metadatas = new ByteBuffer[results.length];
    }
    getStore().getFileMetadata(fileIds, metadatas);
    for (int i = 0; i < metadatas.length; ++i) {
        eliminated[i] = false;
        results[i] = null;
        if (metadatas[i] == null)
            continue;
        // Duplicate to avoid modification.
        ByteBuffer metadata = metadatas[i].duplicate();
        SplitInfos result = null;
        try {
            result = getFileFormatProxy().applySargToMetadata(sarg, metadata);
        } catch (IOException ex) {
            LOG.error("Failed to apply SARG to metadata", ex);
            metadatas[i] = null;
            continue;
        }
        eliminated[i] = (result == null);
        if (!eliminated[i]) {
            results[i] = ByteBuffer.wrap(result.toByteArray());
        }
    }
}
Also used : SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) IOException(java.io.IOException) SplitInfos(org.apache.hadoop.hive.metastore.Metastore.SplitInfos) ByteBuffer(java.nio.ByteBuffer)

Aggregations

SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)15 Test (org.junit.Test)11 MessageType (org.apache.parquet.schema.MessageType)7 FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)6 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3 ByteBuffer (java.nio.ByteBuffer)2 Properties (java.util.Properties)2 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)2 AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)2 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 RecordWriter (org.apache.hadoop.mapred.RecordWriter)2 Kryo (com.esotericsoftware.kryo.Kryo)1 Output (com.esotericsoftware.kryo.io.Output)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 SplitInfos (org.apache.hadoop.hive.metastore.Metastore.SplitInfos)1 PredicateLeaf (org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf)1 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)1