use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.
the class TestOperators method setUp.
@Before
public void setUp() {
r = new InspectableObject[5];
ArrayList<String> names = new ArrayList<String>(3);
names.add("col0");
names.add("col1");
names.add("col2");
ArrayList<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>(3);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
for (int i = 0; i < 5; i++) {
ArrayList<String> data = new ArrayList<String>();
data.add("" + i);
data.add("" + (i + 1));
data.add("" + (i + 2));
try {
r[i] = new InspectableObject();
r[i].o = data;
r[i].oi = ObjectInspectorFactory.getStandardStructObjectInspector(names, objectInspectors);
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.
the class TestOperators method testHashAggr.
private void testHashAggr(GroupByOperator op, HiveConf hconf, InspectableObject[] r, int expectOutputSize) throws HiveException {
// 1. Collect operator to observe the output of the group by operator
CollectDesc cd = new CollectDesc(expectOutputSize + 10);
CollectOperator cdop = (CollectOperator) OperatorFactory.getAndMakeChild(cd, op);
op.initialize(hconf, new ObjectInspector[] { r[0].oi });
// 2. Evaluate on rows and check hashAggr flag
for (int i = 0; i < r.length; i++) {
op.process(r[i].o, 0);
}
op.close(false);
InspectableObject io = new InspectableObject();
int output = 0;
// 3. Print group by results
do {
cdop.retrieve(io);
if (io.o != null) {
System.out.println("io.o = " + io.o);
output++;
}
} while (io.o != null);
// 4. Check partial result size
assertEquals(expectOutputSize, output);
}
use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.
the class TestOperators method testHashGroupBy.
@Test
public void testHashGroupBy() throws HiveException {
InspectableObject[] input = constructHashAggrInputData(5, 3);
System.out.println("---------------Begin to Construct Groupby Desc-------------");
// 1. Build AggregationDesc
String aggregate = "MAX";
ExprNodeDesc inputColumn = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col0", "table", false);
ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
params.add(inputColumn);
GenericUDAFEvaluator genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(aggregate, params, null, false, false);
AggregationDesc agg = new AggregationDesc(aggregate, genericUDAFEvaluator, params, false, GenericUDAFEvaluator.Mode.PARTIAL1);
ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
aggs.add(agg);
// 2. aggr keys
ExprNodeDesc key1 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col1", "table", false);
ExprNodeDesc key2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col2", "table", false);
ArrayList<ExprNodeDesc> keys = new ArrayList<>();
keys.add(key1);
keys.add(key2);
// 3. outputCols
// @see org.apache.hadoop.hive.ql.exec.GroupByOperator.forward
// outputColumnNames, including: group by keys, agg evaluators output cols.
ArrayList<String> outputColumnNames = new ArrayList<String>();
for (int i = 0; i < keys.size() + aggs.size(); i++) {
outputColumnNames.add("_col" + i);
}
// 4. build GroupByDesc desc
GroupByDesc desc = new GroupByDesc();
desc.setOutputColumnNames(outputColumnNames);
desc.setAggregators(aggs);
desc.setKeys(keys);
desc.setMode(GroupByDesc.Mode.HASH);
desc.setMemoryThreshold(1.0f);
desc.setGroupByMemoryUsage(1.0f);
// minReductionHashAggr
desc.setMinReductionHashAggr(0.5f);
// 5. Configure hive conf and Build group by operator
HiveConf hconf = new HiveConf();
HiveConf.setIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL, 1);
// 6. test hash aggr without grouping sets
System.out.println("---------------Begin to test hash group by without grouping sets-------------");
int withoutGroupingSetsExpectSize = 3;
GroupByOperator op = new GroupByOperator(new CompilationOpContext());
op.setConf(desc);
testHashAggr(op, hconf, input, withoutGroupingSetsExpectSize);
// 7. test hash aggr with grouping sets
System.out.println("---------------Begin to test hash group by with grouping sets------------");
int groupingSetsExpectSize = 6;
desc.setGroupingSetsPresent(true);
ArrayList<Long> groupingSets = new ArrayList<>();
// groupingSets
groupingSets.add(1L);
groupingSets.add(2L);
desc.setListGroupingSets(groupingSets);
// add grouping sets dummy key
ExprNodeDesc groupingSetDummyKey = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, 0L);
keys.add(groupingSetDummyKey);
desc.setKeys(keys);
// groupingSet Position
desc.setGroupingSetPosition(2);
op = new GroupByOperator(new CompilationOpContext());
op.setConf(desc);
testHashAggr(op, hconf, input, groupingSetsExpectSize);
}
use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.
the class ColumnStatsTask method constructColumnStatsFromPackedRows.
private List<ColumnStatistics> constructColumnStatsFromPackedRows(Hive db) throws HiveException, MetaException, IOException {
String currentDb = SessionState.get().getCurrentDatabase();
String tableName = work.getColStats().getTableName();
String partName = null;
List<String> colName = work.getColStats().getColName();
List<String> colType = work.getColStats().getColType();
boolean isTblLevel = work.getColStats().isTblLevel();
List<ColumnStatistics> stats = new ArrayList<ColumnStatistics>();
InspectableObject packedRow;
Table tbl = db.getTable(currentDb, tableName);
while ((packedRow = ftOp.getNextRow()) != null) {
if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) {
throw new HiveException("Unexpected object type encountered while unpacking row");
}
List<ColumnStatisticsObj> statsObjs = new ArrayList<ColumnStatisticsObj>();
StructObjectInspector soi = (StructObjectInspector) packedRow.oi;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
List<Object> list = soi.getStructFieldsDataAsList(packedRow.o);
List<FieldSchema> partColSchema = tbl.getPartCols();
// Partition columns are appended at end, we only care about stats column
int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size();
for (int i = 0; i < numOfStatCols; i++) {
// Get the field objectInspector, fieldName and the field object.
ObjectInspector foi = fields.get(i).getFieldObjectInspector();
Object f = (list == null ? null : list.get(i));
String fieldName = fields.get(i).getFieldName();
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
statsObj.setColName(colName.get(i));
statsObj.setColType(colType.get(i));
unpackStructObject(foi, f, fieldName, statsObj);
statsObjs.add(statsObj);
}
if (!isTblLevel) {
List<String> partVals = new ArrayList<String>();
// Iterate over partition columns to figure out partition name
for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) {
Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(list.get(i));
partVals.add(// could be null for default partition
partVal == null ? this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString());
}
partName = Warehouse.makePartName(partColSchema, partVals);
}
String[] names = Utilities.getDbTableName(currentDb, tableName);
ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, isTblLevel);
ColumnStatistics colStats = new ColumnStatistics();
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
stats.add(colStats);
}
ftOp.clearFetchContext();
return stats;
}
use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.
the class ColStatsProcessor method constructColumnStatsFromPackedRows.
private List<ColumnStatistics> constructColumnStatsFromPackedRows(Table tbl1) throws HiveException, MetaException, IOException {
Table tbl = tbl1;
String partName = null;
List<String> colName = colStatDesc.getColName();
List<String> colType = colStatDesc.getColType();
boolean isTblLevel = colStatDesc.isTblLevel();
List<ColumnStatistics> stats = new ArrayList<ColumnStatistics>();
InspectableObject packedRow;
while ((packedRow = ftOp.getNextRow()) != null) {
if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) {
throw new HiveException("Unexpected object type encountered while unpacking row");
}
List<ColumnStatisticsObj> statsObjs = new ArrayList<ColumnStatisticsObj>();
StructObjectInspector soi = (StructObjectInspector) packedRow.oi;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
List<Object> list = soi.getStructFieldsDataAsList(packedRow.o);
List<FieldSchema> partColSchema = tbl.getPartCols();
// Partition columns are appended at end, we only care about stats column
int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size();
assert list != null;
for (int i = 0; i < numOfStatCols; i++) {
StructField structField = fields.get(i);
String columnName = colName.get(i);
String columnType = colType.get(i);
Object values = list.get(i);
try {
ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveStruct(columnName, columnType, structField, values);
statsObjs.add(statObj);
} catch (Exception e) {
if (isStatsReliable) {
throw new HiveException("Statistics collection failed while (hive.stats.reliable)", e);
} else {
LOG.debug("Because {} is infinite or NaN, we skip stats.", columnName, e);
}
}
}
if (!statsObjs.isEmpty()) {
if (!isTblLevel) {
List<String> partVals = new ArrayList<String>();
// Iterate over partition columns to figure out partition name
for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) {
Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(list.get(i));
partVals.add(// could be null for default partition
partVal == null ? this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString());
}
partName = Warehouse.makePartName(partColSchema, partVals);
}
ColumnStatisticsDesc statsDesc = buildColumnStatsDesc(tbl, partName, isTblLevel);
ColumnStatistics colStats = new ColumnStatistics();
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
stats.add(colStats);
}
}
ftOp.clearFetchContext();
return stats;
}
Aggregations