Search in sources :

Example 76 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class TestVectorGroupByOperator method testRollupAggregationWithFlush.

@Test
public void testRollupAggregationWithFlush() throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("k1");
    mapColumnNames.add("k2");
    mapColumnNames.add("v");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    // select count(v) from name group by rollup (k1,k2);
    Pair<GroupByDesc, VectorGroupByDesc> pair = buildKeyGroupByDesc(ctx, "count", "v", TypeInfoFactory.longTypeInfo, new String[] { "k1", "k2" }, new TypeInfo[] { TypeInfoFactory.longTypeInfo, TypeInfoFactory.longTypeInfo });
    GroupByDesc desc = pair.left;
    VectorGroupByDesc vectorDesc = pair.right;
    desc.setGroupingSetsPresent(true);
    ArrayList<Long> groupingSets = new ArrayList<>();
    // groupingSets
    groupingSets.add(0L);
    groupingSets.add(1L);
    groupingSets.add(2L);
    desc.setListGroupingSets(groupingSets);
    // add grouping sets dummy key
    ExprNodeDesc groupingSetDummyKey = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, 0L);
    // this only works because we used an arraylist in buildKeyGroupByDesc
    // don't do this in actual compiler
    desc.getKeys().add(groupingSetDummyKey);
    // groupingSet Position
    desc.setGroupingSetPosition(2);
    CompilationOpContext cCtx = new CompilationOpContext();
    desc.setMinReductionHashAggr(0.5f);
    // Set really low check interval setting
    hconf.set("hive.groupby.mapaggr.checkinterval", "10");
    hconf.set("hive.vectorized.groupby.checkinterval", "10");
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    // Get the processing mode
    VectorGroupByOperator.ProcessingModeHashAggregate processingMode = (VectorGroupByOperator.ProcessingModeHashAggregate) vgo.processingMode;
    // No changes to the size of the hashtable due to grouping sets.
    assertEquals(1000000, ((VectorGroupByOperator.ProcessingModeHashAggregate) vgo.processingMode).getMaxHtEntries());
    this.outputRowCount = 0;
    out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {

        @Override
        public void inspectRow(Object row, int tag) throws HiveException {
            ++outputRowCount;
        }
    });
    FakeVectorRowBatchFromObjectIterables data = getDataForRollup();
    long countRowsProduced = 0;
    long numElementsToBeRetained = 0;
    int avgAccess = 0;
    for (VectorizedRowBatch unit : data) {
        countRowsProduced += unit.size;
        vgo.process(unit, 0);
        if (countRowsProduced >= 100) {
            // note down avg access
            avgAccess = processingMode.computeAvgAccess();
            numElementsToBeRetained = getElementsHigherThan(processingMode.mapKeysAggregationBuffers, avgAccess);
            // trigger flush explicitly on next iteration
            processingMode.gcCanary.clear();
            break;
        }
    }
    // This processing would trigger flush
    for (VectorizedRowBatch unit : data) {
        long zeroAccessBeforeFlush = getElementsWithZeroAccess(processingMode.mapKeysAggregationBuffers);
        vgo.process(unit, 0);
        long freqElementsAfterFlush = getElementsHigherThan(processingMode.mapKeysAggregationBuffers, avgAccess);
        assertTrue("After flush: " + freqElementsAfterFlush + ", before flush: " + numElementsToBeRetained, (freqElementsAfterFlush >= numElementsToBeRetained));
        // ensure that freq elements are reset for providing chance for others
        long zeroAccessAfterFlush = getElementsWithZeroAccess(processingMode.mapKeysAggregationBuffers);
        assertTrue("After flush: " + zeroAccessAfterFlush + ", before flush: " + zeroAccessBeforeFlush, (zeroAccessAfterFlush > zeroAccessBeforeFlush));
        break;
    }
    vgo.close(false);
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) FakeVectorRowBatchFromObjectIterables(org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) Test(org.junit.Test)

Example 77 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class TestHiveMetaStoreClientApiArgumentsChecker method testGetPartitionsByExpr.

@Test
public void testGetPartitionsByExpr() throws HiveException, TException {
    List<Partition> partitions = new ArrayList<>();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    hive.getPartitionsByExpr(t, node, hive.getConf(), partitions);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) Test(org.junit.Test)

Example 78 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class TestColumnPrunerProcCtx method testGetSelectNestedColPathsFromChildren5.

// Test select named_struct from named_struct:struct<a:boolean,b:double>
@Test
public void testGetSelectNestedColPathsFromChildren5() {
    ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null);
    ExprNodeConstantDesc constADesc = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, "a");
    ExprNodeConstantDesc constBDesc = new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, "b");
    List<ExprNodeDesc> list = new ArrayList<>();
    list.add(constADesc);
    list.add(constBDesc);
    GenericUDF udf = mock(GenericUDF.class);
    ExprNodeDesc funcDesc = new ExprNodeGenericFuncDesc(col1Type, udf, "named_struct", list);
    ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(TypeInfoFactory.doubleTypeInfo, funcDesc, "foo", false);
    final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0"));
    SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths);
    List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths);
    // Return empty result since only constant Desc exists
    assertEquals(0, groups.size());
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Test(org.junit.Test)

Example 79 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class TestHive method checkPartitionsConsistency.

private void checkPartitionsConsistency(Table tbl) throws Exception {
    Set<Partition> allParts = hm.getAllPartitionsOf(tbl);
    List<Partition> allParts2 = hm.getPartitions(tbl);
    assertEquals("inconsistent results: getAllPartitionsOf/getPartitions", allParts, new HashSet<>(allParts2));
    Partition singlePart = allParts2.get(0);
    Partition singlePart2 = hm.getPartition(tbl, singlePart.getSpec(), false);
    assertEquals("inconsistent results: getPartition", singlePart, singlePart2);
    List<ExprNodeDesc> exprs = Lists.newArrayList(new ExprNodeConstantDesc(true), new ExprNodeConstantDesc(true));
    ExprNodeGenericFuncDesc trueExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(), "and", exprs);
    List<Partition> allParts3 = new ArrayList<Partition>();
    hm.getPartitionsByExpr(tbl, trueExpr, hm.getConf(), allParts3);
    assertEquals("inconsistent results: getPartitionsByExpr", allParts2, allParts3);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)

Example 80 with ExprNodeConstantDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.

the class TestSharedWorkOptimizer method getFilterOp.

private Operator<? extends OperatorDesc> getFilterOp(int constVal) {
    ExprNodeDesc pred = new ExprNodeConstantDesc(constVal);
    FilterDesc fd = new FilterDesc(pred, true);
    Operator<? extends OperatorDesc> op = OperatorFactory.get(cCtx, fd);
    return op;
}
Also used : FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)208 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)178 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)134 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)123 ArrayList (java.util.ArrayList)97 Test (org.junit.Test)71 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)46 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)39 HashMap (java.util.HashMap)32 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)30 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)27 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)27 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)27 List (java.util.List)23 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)22 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)21 GenericUDFOPEqualOrLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan)21 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)20 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)20 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)19