Search in sources :

Example 1 with FakeVectorRowBatchFromObjectIterables

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.

the class TestVectorGroupByOperator method testMemoryPressureFlush.

@Test
public void testMemoryPressureFlush() throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("Key");
    mapColumnNames.add("Value");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    Pair<GroupByDesc, VectorGroupByDesc> pair = buildKeyGroupByDesc(ctx, "max", "Value", TypeInfoFactory.longTypeInfo, new String[] { "Key" }, new TypeInfo[] { TypeInfoFactory.longTypeInfo });
    GroupByDesc desc = pair.left;
    VectorGroupByDesc vectorDesc = pair.right;
    // Set the memory treshold so that we get 100Kb before we need to flush.
    MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
    long maxMemory = memoryMXBean.getHeapMemoryUsage().getMax();
    float treshold = 100.0f * 1024.0f / maxMemory;
    desc.setMemoryThreshold(treshold);
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    long expected = vgo.getMaxMemory();
    assertEquals(expected, maxMemory);
    this.outputRowCount = 0;
    out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {

        @Override
        public void inspectRow(Object row, int tag) throws HiveException {
            ++outputRowCount;
        }
    });
    Iterable<Object> it = new Iterable<Object>() {

        @Override
        public Iterator<Object> iterator() {
            return new Iterator<Object>() {

                long value = 0;

                @Override
                public boolean hasNext() {
                    return true;
                }

                @Override
                public Object next() {
                    return ++value;
                }

                @Override
                public void remove() {
                }
            };
        }
    };
    FakeVectorRowBatchFromObjectIterables data = new FakeVectorRowBatchFromObjectIterables(100, new String[] { "long", "long" }, it, it);
    // The 'it' data source will produce data w/o ever ending
    // We want to see that memory pressure kicks in and some
    // entries in the VGBY are flushed.
    long countRowsProduced = 0;
    for (VectorizedRowBatch unit : data) {
        countRowsProduced += 100;
        vgo.process(unit, 0);
        if (0 < outputRowCount) {
            break;
        }
        // Set an upper bound how much we're willing to push before it should flush
        // we've set the memory treshold at 100kb, each key is distinct
        // It should not go beyond 100k/16 (key+data)
        assertTrue(countRowsProduced < 100 * 1024 / 16);
    }
    assertTrue(0 < outputRowCount);
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) MemoryMXBean(java.lang.management.MemoryMXBean) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) Iterator(java.util.Iterator) FakeVectorRowBatchFromObjectIterables(org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) Test(org.junit.Test)

Example 2 with FakeVectorRowBatchFromObjectIterables

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.

the class TestVectorGroupByOperator method testAggregateString.

public void testAggregateString(String aggregateName, int batchSize, Iterable<Object> values, Object expected) throws HiveException {
    @SuppressWarnings("unchecked") FakeVectorRowBatchFromObjectIterables fdr = new FakeVectorRowBatchFromObjectIterables(batchSize, new String[] { "string" }, values);
    testAggregateStringIterable(aggregateName, fdr, expected);
}
Also used : FakeVectorRowBatchFromObjectIterables(org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables)

Example 3 with FakeVectorRowBatchFromObjectIterables

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.

the class TestVectorGroupByOperator method testAggregateDecimal.

public void testAggregateDecimal(String typeName, String aggregateName, int batchSize, Iterable<Object> values, Object expected) throws HiveException {
    @SuppressWarnings("unchecked") FakeVectorRowBatchFromObjectIterables fdr = new FakeVectorRowBatchFromObjectIterables(batchSize, new String[] { typeName }, values);
    testAggregateDecimalIterable(aggregateName, fdr, expected);
}
Also used : FakeVectorRowBatchFromObjectIterables(org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables)

Example 4 with FakeVectorRowBatchFromObjectIterables

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.

the class TestVectorGroupByOperator method testRollupAggregationWithBufferReuse.

@Test
public void testRollupAggregationWithBufferReuse() throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("k1");
    mapColumnNames.add("k2");
    mapColumnNames.add("v");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    // select count(v) from name group by rollup (k1,k2);
    Pair<GroupByDesc, VectorGroupByDesc> pair = buildKeyGroupByDesc(ctx, "count", "v", TypeInfoFactory.longTypeInfo, new String[] { "k1", "k2" }, new TypeInfo[] { TypeInfoFactory.longTypeInfo, TypeInfoFactory.longTypeInfo });
    GroupByDesc desc = pair.left;
    VectorGroupByDesc vectorDesc = pair.right;
    desc.setGroupingSetsPresent(true);
    ArrayList<Long> groupingSets = new ArrayList<>();
    // groupingSets
    groupingSets.add(0L);
    groupingSets.add(1L);
    groupingSets.add(2L);
    desc.setListGroupingSets(groupingSets);
    // add grouping sets dummy key
    ExprNodeDesc groupingSetDummyKey = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, 0L);
    desc.getKeys().add(groupingSetDummyKey);
    // groupingSet Position
    desc.setGroupingSetPosition(2);
    CompilationOpContext cCtx = new CompilationOpContext();
    desc.setMinReductionHashAggr(0.5f);
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    // Get the processing mode
    VectorGroupByOperator.ProcessingModeHashAggregate processingMode = (VectorGroupByOperator.ProcessingModeHashAggregate) vgo.processingMode;
    VectorAggregateExpression spyAggregator = spy(vgo.aggregators[0]);
    vgo.aggregators[0] = spyAggregator;
    FakeVectorRowBatchFromObjectIterables data = getDataForRollup();
    long countRowsProduced = 0;
    for (VectorizedRowBatch unit : data) {
        countRowsProduced += unit.size;
        vgo.process(unit, 0);
        // trigger flush frequently to simulate operator working on many batches
        processingMode.gcCanary.clear();
        if (countRowsProduced >= 1000) {
            break;
        }
    }
    vgo.close(false);
    // The exact number of allocations depend on input. In this case it is 13.
    // Without buffer reuse, we allocate 512 buffers for the same input
    verify(spyAggregator, times(13)).getNewAggregationBuffer();
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) VectorAggregateExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) FakeVectorRowBatchFromObjectIterables(org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) Test(org.junit.Test)

Example 5 with FakeVectorRowBatchFromObjectIterables

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.

the class TestVectorGroupByOperator method testMultiKeyDoubleShortString.

@Test
public void testMultiKeyDoubleShortString() throws HiveException {
    short s = 2;
    testMultiKey("sum", new FakeVectorRowBatchFromObjectIterables(2, new String[] { "double", "smallint", "string", "double" }, Arrays.asList(new Object[] { null, 1.0, 1.0, null, 2.0, 2.0, null }), Arrays.asList(new Object[] { null, s, s, null, s, s, null }), Arrays.asList(new Object[] { "A", "A", "A", "C", null, null, "A" }), Arrays.asList(new Object[] { 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0 })), buildHashMap(Arrays.asList(1.0, s, "A"), 6.0, Arrays.asList(null, null, "C"), 8.0, Arrays.asList(2.0, s, null), 48.0, Arrays.asList(null, null, "A"), 65.0));
}
Also used : FakeVectorRowBatchFromObjectIterables(org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables) Test(org.junit.Test)

Aggregations

FakeVectorRowBatchFromObjectIterables (org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables)13 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)7 Test (org.junit.Test)7 ArrayList (java.util.ArrayList)6 FakeCaptureVectorToRowOutputOperator (org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator)6 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)6 VectorGroupByDesc (org.apache.hadoop.hive.ql.plan.VectorGroupByDesc)6 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)5 Iterator (java.util.Iterator)3 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)3 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)3 MemoryMXBean (java.lang.management.MemoryMXBean)2 Configuration (org.apache.hadoop.conf.Configuration)1 VectorAggregateExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression)1 LimitDesc (org.apache.hadoop.hive.ql.plan.LimitDesc)1 VectorLimitDesc (org.apache.hadoop.hive.ql.plan.VectorLimitDesc)1 ObjectRegistryImpl (org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl)1