use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.
the class TestVectorGroupByOperator method testMemoryPressureFlush.
@Test
public void testMemoryPressureFlush() throws HiveException {
List<String> mapColumnNames = new ArrayList<String>();
mapColumnNames.add("Key");
mapColumnNames.add("Value");
VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
Pair<GroupByDesc, VectorGroupByDesc> pair = buildKeyGroupByDesc(ctx, "max", "Value", TypeInfoFactory.longTypeInfo, new String[] { "Key" }, new TypeInfo[] { TypeInfoFactory.longTypeInfo });
GroupByDesc desc = pair.left;
VectorGroupByDesc vectorDesc = pair.right;
// Set the memory treshold so that we get 100Kb before we need to flush.
MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
long maxMemory = memoryMXBean.getHeapMemoryUsage().getMax();
float treshold = 100.0f * 1024.0f / maxMemory;
desc.setMemoryThreshold(treshold);
CompilationOpContext cCtx = new CompilationOpContext();
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
long expected = vgo.getMaxMemory();
assertEquals(expected, maxMemory);
this.outputRowCount = 0;
out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {
@Override
public void inspectRow(Object row, int tag) throws HiveException {
++outputRowCount;
}
});
Iterable<Object> it = new Iterable<Object>() {
@Override
public Iterator<Object> iterator() {
return new Iterator<Object>() {
long value = 0;
@Override
public boolean hasNext() {
return true;
}
@Override
public Object next() {
return ++value;
}
@Override
public void remove() {
}
};
}
};
FakeVectorRowBatchFromObjectIterables data = new FakeVectorRowBatchFromObjectIterables(100, new String[] { "long", "long" }, it, it);
// The 'it' data source will produce data w/o ever ending
// We want to see that memory pressure kicks in and some
// entries in the VGBY are flushed.
long countRowsProduced = 0;
for (VectorizedRowBatch unit : data) {
countRowsProduced += 100;
vgo.process(unit, 0);
if (0 < outputRowCount) {
break;
}
// Set an upper bound how much we're willing to push before it should flush
// we've set the memory treshold at 100kb, each key is distinct
// It should not go beyond 100k/16 (key+data)
assertTrue(countRowsProduced < 100 * 1024 / 16);
}
assertTrue(0 < outputRowCount);
}
use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.
the class TestVectorGroupByOperator method testAggregateString.
public void testAggregateString(String aggregateName, int batchSize, Iterable<Object> values, Object expected) throws HiveException {
@SuppressWarnings("unchecked") FakeVectorRowBatchFromObjectIterables fdr = new FakeVectorRowBatchFromObjectIterables(batchSize, new String[] { "string" }, values);
testAggregateStringIterable(aggregateName, fdr, expected);
}
use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.
the class TestVectorGroupByOperator method testAggregateDecimal.
public void testAggregateDecimal(String typeName, String aggregateName, int batchSize, Iterable<Object> values, Object expected) throws HiveException {
@SuppressWarnings("unchecked") FakeVectorRowBatchFromObjectIterables fdr = new FakeVectorRowBatchFromObjectIterables(batchSize, new String[] { typeName }, values);
testAggregateDecimalIterable(aggregateName, fdr, expected);
}
use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.
the class TestVectorGroupByOperator method testRollupAggregationWithBufferReuse.
@Test
public void testRollupAggregationWithBufferReuse() throws HiveException {
List<String> mapColumnNames = new ArrayList<String>();
mapColumnNames.add("k1");
mapColumnNames.add("k2");
mapColumnNames.add("v");
VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
// select count(v) from name group by rollup (k1,k2);
Pair<GroupByDesc, VectorGroupByDesc> pair = buildKeyGroupByDesc(ctx, "count", "v", TypeInfoFactory.longTypeInfo, new String[] { "k1", "k2" }, new TypeInfo[] { TypeInfoFactory.longTypeInfo, TypeInfoFactory.longTypeInfo });
GroupByDesc desc = pair.left;
VectorGroupByDesc vectorDesc = pair.right;
desc.setGroupingSetsPresent(true);
ArrayList<Long> groupingSets = new ArrayList<>();
// groupingSets
groupingSets.add(0L);
groupingSets.add(1L);
groupingSets.add(2L);
desc.setListGroupingSets(groupingSets);
// add grouping sets dummy key
ExprNodeDesc groupingSetDummyKey = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, 0L);
desc.getKeys().add(groupingSetDummyKey);
// groupingSet Position
desc.setGroupingSetPosition(2);
CompilationOpContext cCtx = new CompilationOpContext();
desc.setMinReductionHashAggr(0.5f);
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
// Get the processing mode
VectorGroupByOperator.ProcessingModeHashAggregate processingMode = (VectorGroupByOperator.ProcessingModeHashAggregate) vgo.processingMode;
VectorAggregateExpression spyAggregator = spy(vgo.aggregators[0]);
vgo.aggregators[0] = spyAggregator;
FakeVectorRowBatchFromObjectIterables data = getDataForRollup();
long countRowsProduced = 0;
for (VectorizedRowBatch unit : data) {
countRowsProduced += unit.size;
vgo.process(unit, 0);
// trigger flush frequently to simulate operator working on many batches
processingMode.gcCanary.clear();
if (countRowsProduced >= 1000) {
break;
}
}
vgo.close(false);
// The exact number of allocations depend on input. In this case it is 13.
// Without buffer reuse, we allocate 512 buffers for the same input
verify(spyAggregator, times(13)).getNewAggregationBuffer();
}
use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.
the class TestVectorGroupByOperator method testMultiKeyDoubleShortString.
@Test
public void testMultiKeyDoubleShortString() throws HiveException {
short s = 2;
testMultiKey("sum", new FakeVectorRowBatchFromObjectIterables(2, new String[] { "double", "smallint", "string", "double" }, Arrays.asList(new Object[] { null, 1.0, 1.0, null, 2.0, 2.0, null }), Arrays.asList(new Object[] { null, s, s, null, s, s, null }), Arrays.asList(new Object[] { "A", "A", "A", "C", null, null, "A" }), Arrays.asList(new Object[] { 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0 })), buildHashMap(Arrays.asList(1.0, s, "A"), 6.0, Arrays.asList(null, null, "C"), 8.0, Arrays.asList(2.0, s, null), 48.0, Arrays.asList(null, null, "A"), 65.0));
}
Aggregations