use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class TestVectorGroupByOperator method testRollupAggregationWithFlush.
@Test
public void testRollupAggregationWithFlush() throws HiveException {
List<String> mapColumnNames = new ArrayList<String>();
mapColumnNames.add("k1");
mapColumnNames.add("k2");
mapColumnNames.add("v");
VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
// select count(v) from name group by rollup (k1,k2);
Pair<GroupByDesc, VectorGroupByDesc> pair = buildKeyGroupByDesc(ctx, "count", "v", TypeInfoFactory.longTypeInfo, new String[] { "k1", "k2" }, new TypeInfo[] { TypeInfoFactory.longTypeInfo, TypeInfoFactory.longTypeInfo });
GroupByDesc desc = pair.left;
VectorGroupByDesc vectorDesc = pair.right;
desc.setGroupingSetsPresent(true);
ArrayList<Long> groupingSets = new ArrayList<>();
// groupingSets
groupingSets.add(0L);
groupingSets.add(1L);
groupingSets.add(2L);
desc.setListGroupingSets(groupingSets);
// add grouping sets dummy key
ExprNodeDesc groupingSetDummyKey = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, 0L);
// this only works because we used an arraylist in buildKeyGroupByDesc
// don't do this in actual compiler
desc.getKeys().add(groupingSetDummyKey);
// groupingSet Position
desc.setGroupingSetPosition(2);
CompilationOpContext cCtx = new CompilationOpContext();
desc.setMinReductionHashAggr(0.5f);
// Set really low check interval setting
hconf.set("hive.groupby.mapaggr.checkinterval", "10");
hconf.set("hive.vectorized.groupby.checkinterval", "10");
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
// Get the processing mode
VectorGroupByOperator.ProcessingModeHashAggregate processingMode = (VectorGroupByOperator.ProcessingModeHashAggregate) vgo.processingMode;
// No changes to the size of the hashtable due to grouping sets.
assertEquals(1000000, ((VectorGroupByOperator.ProcessingModeHashAggregate) vgo.processingMode).getMaxHtEntries());
this.outputRowCount = 0;
out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {
@Override
public void inspectRow(Object row, int tag) throws HiveException {
++outputRowCount;
}
});
FakeVectorRowBatchFromObjectIterables data = getDataForRollup();
long countRowsProduced = 0;
long numElementsToBeRetained = 0;
int avgAccess = 0;
for (VectorizedRowBatch unit : data) {
countRowsProduced += unit.size;
vgo.process(unit, 0);
if (countRowsProduced >= 100) {
// note down avg access
avgAccess = processingMode.computeAvgAccess();
numElementsToBeRetained = getElementsHigherThan(processingMode.mapKeysAggregationBuffers, avgAccess);
// trigger flush explicitly on next iteration
processingMode.gcCanary.clear();
break;
}
}
// This processing would trigger flush
for (VectorizedRowBatch unit : data) {
long zeroAccessBeforeFlush = getElementsWithZeroAccess(processingMode.mapKeysAggregationBuffers);
vgo.process(unit, 0);
long freqElementsAfterFlush = getElementsHigherThan(processingMode.mapKeysAggregationBuffers, avgAccess);
assertTrue("After flush: " + freqElementsAfterFlush + ", before flush: " + numElementsToBeRetained, (freqElementsAfterFlush >= numElementsToBeRetained));
// ensure that freq elements are reset for providing chance for others
long zeroAccessAfterFlush = getElementsWithZeroAccess(processingMode.mapKeysAggregationBuffers);
assertTrue("After flush: " + zeroAccessAfterFlush + ", before flush: " + zeroAccessBeforeFlush, (zeroAccessAfterFlush > zeroAccessBeforeFlush));
break;
}
vgo.close(false);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class TestHiveMetaStoreClientApiArgumentsChecker method testGetPartitionsByExpr.
@Test
public void testGetPartitionsByExpr() throws HiveException, TException {
List<Partition> partitions = new ArrayList<>();
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f");
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
hive.getPartitionsByExpr(t, node, hive.getConf(), partitions);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class TestColumnPrunerProcCtx method testGetSelectNestedColPathsFromChildren5.
// Test select named_struct from named_struct:struct<a:boolean,b:double>
@Test
public void testGetSelectNestedColPathsFromChildren5() {
ColumnPrunerProcCtx ctx = new ColumnPrunerProcCtx(null);
ExprNodeConstantDesc constADesc = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, "a");
ExprNodeConstantDesc constBDesc = new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, "b");
List<ExprNodeDesc> list = new ArrayList<>();
list.add(constADesc);
list.add(constBDesc);
GenericUDF udf = mock(GenericUDF.class);
ExprNodeDesc funcDesc = new ExprNodeGenericFuncDesc(col1Type, udf, "named_struct", list);
ExprNodeDesc fieldDesc = new ExprNodeFieldDesc(TypeInfoFactory.doubleTypeInfo, funcDesc, "foo", false);
final List<FieldNode> paths = Arrays.asList(new FieldNode("_col0"));
SelectOperator selectOperator = buildSelectOperator(Arrays.asList(fieldDesc), paths);
List<FieldNode> groups = ctx.getSelectColsFromChildren(selectOperator, paths);
// Return empty result since only constant Desc exists
assertEquals(0, groups.size());
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class TestHive method checkPartitionsConsistency.
private void checkPartitionsConsistency(Table tbl) throws Exception {
Set<Partition> allParts = hm.getAllPartitionsOf(tbl);
List<Partition> allParts2 = hm.getPartitions(tbl);
assertEquals("inconsistent results: getAllPartitionsOf/getPartitions", allParts, new HashSet<>(allParts2));
Partition singlePart = allParts2.get(0);
Partition singlePart2 = hm.getPartition(tbl, singlePart.getSpec(), false);
assertEquals("inconsistent results: getPartition", singlePart, singlePart2);
List<ExprNodeDesc> exprs = Lists.newArrayList(new ExprNodeConstantDesc(true), new ExprNodeConstantDesc(true));
ExprNodeGenericFuncDesc trueExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(), "and", exprs);
List<Partition> allParts3 = new ArrayList<Partition>();
hm.getPartitionsByExpr(tbl, trueExpr, hm.getConf(), allParts3);
assertEquals("inconsistent results: getPartitionsByExpr", allParts2, allParts3);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc in project hive by apache.
the class TestSharedWorkOptimizer method getFilterOp.
private Operator<? extends OperatorDesc> getFilterOp(int constVal) {
ExprNodeDesc pred = new ExprNodeConstantDesc(constVal);
FilterDesc fd = new FilterDesc(pred, true);
Operator<? extends OperatorDesc> op = OperatorFactory.get(cCtx, fd);
return op;
}
Aggregations