Search in sources :

Example 11 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class TestOrcSplitElimination method testFooterExternalCacheImpl.

private void testFooterExternalCacheImpl(boolean isPpd) throws IOException {
    ObjectInspector inspector = createIO();
    writeFile(inspector, testFilePath);
    writeFile(inspector, testFilePath2);
    GenericUDF udf = new GenericUDFOPEqualOrLessThan();
    List<ExprNodeDesc> childExpr = Lists.newArrayList();
    createTestSarg(inspector, udf, childExpr);
    setupExternalCacheConfig(isPpd, testFilePath + "," + testFilePath2);
    // Get the base values w/o cache.
    conf.setBoolean(ConfVars.HIVE_ORC_MS_FOOTER_CACHE_ENABLED.varname, false);
    OrcInputFormatForTest.clearLocalCache();
    OrcInputFormat in0 = new OrcInputFormat();
    InputSplit[] originals = in0.getSplits(conf, -1);
    assertEquals(10, originals.length);
    HashSet<FsWithHash> originalHs = new HashSet<>();
    for (InputSplit original : originals) {
        originalHs.add(new FsWithHash((FileSplit) original));
    }
    // Populate the cache.
    conf.setBoolean(ConfVars.HIVE_ORC_MS_FOOTER_CACHE_ENABLED.varname, true);
    OrcInputFormatForTest in = new OrcInputFormatForTest();
    OrcInputFormatForTest.clearLocalCache();
    OrcInputFormatForTest.caches.resetCounts();
    OrcInputFormatForTest.caches.cache.clear();
    InputSplit[] splits = in.getSplits(conf, -1);
    // Puts, gets, hits, unused, unused.
    @SuppressWarnings("static-access") AtomicInteger[] counts = { in.caches.putCount, isPpd ? in.caches.getByExprCount : in.caches.getCount, isPpd ? in.caches.getHitByExprCount : in.caches.getHitCount, isPpd ? in.caches.getCount : in.caches.getByExprCount, isPpd ? in.caches.getHitCount : in.caches.getHitByExprCount };
    verifySplits(originalHs, splits);
    verifyCallCounts(counts, 2, 2, 0);
    assertEquals(2, OrcInputFormatForTest.caches.cache.size());
    // Verify we can get from cache.
    OrcInputFormatForTest.clearLocalCache();
    OrcInputFormatForTest.caches.resetCounts();
    splits = in.getSplits(conf, -1);
    verifySplits(originalHs, splits);
    verifyCallCounts(counts, 0, 2, 2);
    // Verify ORC SARG still works.
    OrcInputFormatForTest.clearLocalCache();
    OrcInputFormatForTest.caches.resetCounts();
    childExpr.set(1, new ExprNodeConstantDesc(5));
    conf.set("hive.io.filter.expr.serialized", SerializationUtilities.serializeExpression(new ExprNodeGenericFuncDesc(inspector, udf, childExpr)));
    splits = in.getSplits(conf, -1);
    InputSplit[] filtered = { originals[0], originals[4], originals[5], originals[9] };
    originalHs = new HashSet<>();
    for (InputSplit original : filtered) {
        originalHs.add(new FsWithHash((FileSplit) original));
    }
    verifySplits(originalHs, splits);
    verifyCallCounts(counts, 0, 2, 2);
    // Verify corrupted cache value gets replaced.
    OrcInputFormatForTest.clearLocalCache();
    OrcInputFormatForTest.caches.resetCounts();
    Map.Entry<Long, MockExternalCaches.MockItem> e = OrcInputFormatForTest.caches.cache.entrySet().iterator().next();
    Long key = e.getKey();
    byte[] someData = new byte[8];
    ByteBuffer toCorrupt = e.getValue().data;
    System.arraycopy(toCorrupt.array(), toCorrupt.arrayOffset(), someData, 0, someData.length);
    toCorrupt.putLong(0, 0L);
    splits = in.getSplits(conf, -1);
    verifySplits(originalHs, splits);
    if (!isPpd) {
        // Recovery is not implemented yet for PPD path.
        ByteBuffer restored = OrcInputFormatForTest.caches.cache.get(key).data;
        byte[] newData = new byte[someData.length];
        System.arraycopy(restored.array(), restored.arrayOffset(), newData, 0, newData.length);
        assertArrayEquals(someData, newData);
    }
}
Also used : GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) FileSplit(org.apache.hadoop.mapred.FileSplit) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) InputSplit(org.apache.hadoop.mapred.InputSplit) HashSet(java.util.HashSet) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ByteBuffer(java.nio.ByteBuffer) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 12 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class TestParquetRowGroupFilter method testRowGroupFilterTakeEffect.

@Test
public void testRowGroupFilterTakeEffect() throws Exception {
    // define schema
    columnNames = "intCol";
    columnTypes = "int";
    StructObjectInspector inspector = getObjectInspector(columnNames, columnTypes);
    MessageType fileSchema = MessageTypeParser.parseMessageType("message hive_schema {\n" + "  optional int32 intCol;\n" + "}\n");
    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "intCol");
    conf.set("columns", "intCol");
    conf.set("columns.types", "int");
    // create Parquet file with specific data
    Path testPath = writeDirect("RowGroupFilterTakeEffect", fileSchema, new DirectWriter() {

        @Override
        public void write(RecordConsumer consumer) {
            for (int i = 0; i < 100; i++) {
                consumer.startMessage();
                consumer.startField("int", 0);
                consumer.addInteger(i);
                consumer.endField("int", 0);
                consumer.endMessage();
            }
        }
    });
    // > 50
    GenericUDF udf = new GenericUDFOPGreaterThan();
    List<ExprNodeDesc> children = Lists.newArrayList();
    ExprNodeColumnDesc columnDesc = new ExprNodeColumnDesc(Integer.class, "intCol", "T", false);
    ExprNodeConstantDesc constantDesc = new ExprNodeConstantDesc(50);
    children.add(columnDesc);
    children.add(constantDesc);
    ExprNodeGenericFuncDesc genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children);
    String searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
    ParquetRecordReaderWrapper recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader(new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null);
    Assert.assertEquals("row group is not filtered correctly", 1, recordReader.getFiltedBlocks().size());
    // > 100
    constantDesc = new ExprNodeConstantDesc(100);
    children.set(1, constantDesc);
    genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children);
    searchArgumentStr = SerializationUtilities.serializeExpression(genericFuncDesc);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr);
    recordReader = (ParquetRecordReaderWrapper) new MapredParquetInputFormat().getRecordReader(new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null);
    Assert.assertEquals("row group is not filtered correctly", 0, recordReader.getFiltedBlocks().size());
}
Also used : Path(org.apache.hadoop.fs.Path) GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ParquetRecordReaderWrapper(org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) FileSplit(org.apache.hadoop.mapred.FileSplit) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MessageType(org.apache.parquet.schema.MessageType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 13 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class TestColumnPrunerProcCtx method testGetSelectNestedColPathsFromChildren5.

// Test select named_struct from named_struct:struct<a:boolean,b:double>
@Test
public void testGetSelectNestedColPathsFromChildren5() {
    ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null);
    ExprNodeConstantDesc constADesc = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, "a");
    ExprNodeConstantDesc constBDesc = new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, "b");
    List<ExprNodeDesc> list = new ArrayList<>();
    list.add(constADesc);
    list.add(constBDesc);
    GenericUDF udf = mock(GenericUDF.class);
    ExprNodeDesc funcDesc = new ExprNodeGenericFuncDesc(col1Type, udf, "named_struct", list);
    ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(TypeInfoFactory.doubleTypeInfo, funcDesc, "foo", false);
    final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0"));
    SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths);
    List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths);
    // Return empty result since only constant Desc exists
    assertEquals(0, groups.size());
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Test(org.junit.Test)

Example 14 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class TestAccumuloRangeGenerator method testRangeConjunctionWithDisjunction.

@Test
public void testRangeConjunctionWithDisjunction() throws Exception {
    // rowId >= 'h'
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "h");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    // rowId <= 'd'
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "d");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children2);
    assertNotNull(node2);
    // rowId >= 'q'
    ExprNodeDesc column3 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant3 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "q");
    List<ExprNodeDesc> children3 = Lists.newArrayList();
    children3.add(column3);
    children3.add(constant3);
    ExprNodeDesc node3 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children3);
    assertNotNull(node3);
    // Or UDF, (rowId <= 'd' or rowId >= 'q')
    List<ExprNodeDesc> orFilters = Lists.newArrayList();
    orFilters.add(node2);
    orFilters.add(node3);
    ExprNodeGenericFuncDesc orNode = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPOr(), orFilters);
    // And UDF, (rowId >= 'h' and (rowId <= 'd' or rowId >= 'q'))
    List<ExprNodeDesc> andFilters = Lists.newArrayList();
    andFilters.add(node);
    andFilters.add(orNode);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), andFilters);
    // Should generate ['q', +inf)
    List<Range> expectedRanges = Arrays.asList(new Range(new Key("q"), true, null, false));
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid");
    Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(both);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    Object result = nodeOutput.get(both);
    Assert.assertNotNull(result);
    Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
    @SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
    Assert.assertEquals(expectedRanges, actualRanges);
}
Also used : GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Range(org.apache.accumulo.core.data.Range) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) Key(org.apache.accumulo.core.data.Key) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 15 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class TestAccumuloRangeGenerator method testRangeConjunction.

@Test
public void testRangeConjunction() throws Exception {
    // rowId >= 'f'
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    // rowId <= 'm'
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "m");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children2);
    assertNotNull(node2);
    // And UDF
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    // Should generate [f,m]
    List<Range> expectedRanges = Arrays.asList(new Range(new Key("f"), true, new Key("m\0"), false));
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid");
    Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(both);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    Object result = nodeOutput.get(both);
    Assert.assertNotNull(result);
    Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
    @SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
    Assert.assertEquals(expectedRanges, actualRanges);
}
Also used : GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Range(org.apache.accumulo.core.data.Range) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) Key(org.apache.accumulo.core.data.Key) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Aggregations

ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)111 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)101 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)80 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)78 Test (org.junit.Test)52 ArrayList (java.util.ArrayList)47 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)19 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)18 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)17 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)17 HashMap (java.util.HashMap)15 Range (org.apache.accumulo.core.data.Range)15 GenericUDFOPEqualOrLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan)15 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)15 VectorUDAFMaxString (org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString)13 VectorUDAFMinString (org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString)13 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)12 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)12 Key (org.apache.accumulo.core.data.Key)11 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)11