use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.
the class TestVectorGroupByOperator method testAggregateLongIterable.
public void testAggregateLongIterable(String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException {
List<String> mapColumnNames = new ArrayList<String>();
mapColumnNames.add("A");
VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.longTypeInfo);
CompilationOpContext cCtx = new CompilationOpContext();
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(null, null);
for (VectorizedRowBatch unit : data) {
vgo.process(unit, 0);
}
vgo.close(false);
List<Object> outBatchList = out.getCapturedRows();
assertNotNull(outBatchList);
assertEquals(1, outBatchList.size());
Object result = outBatchList.get(0);
Validator validator = getValidator(aggregateName);
validator.validate("_total", expected, result);
}
use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.
the class TestVectorGroupByOperator method testAggregateLongKeyIterable.
public void testAggregateLongKeyIterable(String aggregateName, Iterable<VectorizedRowBatch> data, HashMap<Object, Object> expected) throws HiveException {
List<String> mapColumnNames = new ArrayList<String>();
mapColumnNames.add("Key");
mapColumnNames.add("Value");
VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
Set<Object> keys = new HashSet<Object>();
GroupByDesc desc = buildKeyGroupByDesc(ctx, aggregateName, "Value", TypeInfoFactory.longTypeInfo, "Key", TypeInfoFactory.longTypeInfo);
CompilationOpContext cCtx = new CompilationOpContext();
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() {
private String aggregateName;
private HashMap<Object, Object> expected;
private Set<Object> keys;
@Override
public void inspectRow(Object row, int tag) throws HiveException {
assertTrue(row instanceof Object[]);
Object[] fields = (Object[]) row;
assertEquals(2, fields.length);
Object key = fields[0];
Long keyValue = null;
if (null != key) {
assertTrue(key instanceof LongWritable);
LongWritable lwKey = (LongWritable) key;
keyValue = lwKey.get();
}
assertTrue(expected.containsKey(keyValue));
String keyAsString = String.format("%s", key);
Object expectedValue = expected.get(keyValue);
Object value = fields[1];
Validator validator = getValidator(aggregateName);
validator.validate(keyAsString, expectedValue, new Object[] { value });
keys.add(keyValue);
}
private FakeCaptureOutputOperator.OutputInspector init(String aggregateName, HashMap<Object, Object> expected, Set<Object> keys) {
this.aggregateName = aggregateName;
this.expected = expected;
this.keys = keys;
return this;
}
}.init(aggregateName, expected, keys));
for (VectorizedRowBatch unit : data) {
vgo.process(unit, 0);
}
vgo.close(false);
List<Object> outBatchList = out.getCapturedRows();
assertNotNull(outBatchList);
assertEquals(expected.size(), outBatchList.size());
assertEquals(expected.size(), keys.size());
}
use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.
the class TestVectorGroupByOperator method testMemoryPressureFlush.
@Test
public void testMemoryPressureFlush() throws HiveException {
List<String> mapColumnNames = new ArrayList<String>();
mapColumnNames.add("Key");
mapColumnNames.add("Value");
VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
GroupByDesc desc = buildKeyGroupByDesc(ctx, "max", "Value", TypeInfoFactory.longTypeInfo, "Key", TypeInfoFactory.longTypeInfo);
// Set the memory treshold so that we get 100Kb before we need to flush.
MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
long maxMemory = memoryMXBean.getHeapMemoryUsage().getMax();
float treshold = 100.0f * 1024.0f / maxMemory;
desc.setMemoryThreshold(treshold);
CompilationOpContext cCtx = new CompilationOpContext();
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
this.outputRowCount = 0;
out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() {
@Override
public void inspectRow(Object row, int tag) throws HiveException {
++outputRowCount;
}
});
Iterable<Object> it = new Iterable<Object>() {
@Override
public Iterator<Object> iterator() {
return new Iterator<Object>() {
long value = 0;
@Override
public boolean hasNext() {
return true;
}
@Override
public Object next() {
return ++value;
}
@Override
public void remove() {
}
};
}
};
FakeVectorRowBatchFromObjectIterables data = new FakeVectorRowBatchFromObjectIterables(100, new String[] { "long", "long" }, it, it);
// The 'it' data source will produce data w/o ever ending
// We want to see that memory pressure kicks in and some
// entries in the VGBY are flushed.
long countRowsProduced = 0;
for (VectorizedRowBatch unit : data) {
countRowsProduced += 100;
vgo.process(unit, 0);
if (0 < outputRowCount) {
break;
}
// Set an upper bound how much we're willing to push before it should flush
// we've set the memory treshold at 100kb, each key is distinct
// It should not go beyond 100k/16 (key+data)
assertTrue(countRowsProduced < 100 * 1024 / 16);
}
assertTrue(0 < outputRowCount);
}
use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.
the class TestVectorGroupByOperator method testAggregateDecimalIterable.
public void testAggregateDecimalIterable(String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException {
List<String> mapColumnNames = new ArrayList<String>();
mapColumnNames.add("A");
VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4));
CompilationOpContext cCtx = new CompilationOpContext();
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
for (VectorizedRowBatch unit : data) {
vgo.process(unit, 0);
}
vgo.close(false);
List<Object> outBatchList = out.getCapturedRows();
assertNotNull(outBatchList);
assertEquals(1, outBatchList.size());
Object result = outBatchList.get(0);
Validator validator = getValidator(aggregateName);
validator.validate("_total", expected, result);
}
use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.
the class TestVectorGroupByOperator method buildGroupByDescCountStar.
private static GroupByDesc buildGroupByDescCountStar(VectorizationContext ctx) {
AggregationDesc agg = buildAggregationDescCountStar(ctx);
ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
aggs.add(agg);
ArrayList<String> outputColumnNames = new ArrayList<String>();
outputColumnNames.add("_col0");
GroupByDesc desc = new GroupByDesc();
desc.setVectorDesc(new VectorGroupByDesc());
desc.setOutputColumnNames(outputColumnNames);
desc.setAggregators(aggs);
return desc;
}
Aggregations