Search in sources :

Example 16 with InspectableObject

use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.

the class TestOperators method setUp.

@Before
public void setUp() {
    r = new InspectableObject[5];
    ArrayList<String> names = new ArrayList<String>(3);
    names.add("col0");
    names.add("col1");
    names.add("col2");
    ArrayList<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>(3);
    objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    for (int i = 0; i < 5; i++) {
        ArrayList<String> data = new ArrayList<String>();
        data.add("" + i);
        data.add("" + (i + 1));
        data.add("" + (i + 2));
        try {
            r[i] = new InspectableObject();
            r[i].o = data;
            r[i].oi = ObjectInspectorFactory.getStandardStructObjectInspector(names, objectInspectors);
        } catch (Throwable e) {
            throw new RuntimeException(e);
        }
    }
}
Also used : InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ArrayList(java.util.ArrayList) Before(org.junit.Before)

Example 17 with InspectableObject

use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.

the class TestOperators method testHashAggr.

private void testHashAggr(GroupByOperator op, HiveConf hconf, InspectableObject[] r, int expectOutputSize) throws HiveException {
    // 1. Collect operator to observe the output of the group by operator
    CollectDesc cd = new CollectDesc(expectOutputSize + 10);
    CollectOperator cdop = (CollectOperator) OperatorFactory.getAndMakeChild(cd, op);
    op.initialize(hconf, new ObjectInspector[] { r[0].oi });
    // 2. Evaluate on rows and check hashAggr flag
    for (int i = 0; i < r.length; i++) {
        op.process(r[i].o, 0);
    }
    op.close(false);
    InspectableObject io = new InspectableObject();
    int output = 0;
    // 3. Print group by results
    do {
        cdop.retrieve(io);
        if (io.o != null) {
            System.out.println("io.o = " + io.o);
            output++;
        }
    } while (io.o != null);
    // 4. Check partial result size
    assertEquals(expectOutputSize, output);
}
Also used : InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) CollectDesc(org.apache.hadoop.hive.ql.plan.CollectDesc)

Example 18 with InspectableObject

use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.

the class TestOperators method testHashGroupBy.

@Test
public void testHashGroupBy() throws HiveException {
    InspectableObject[] input = constructHashAggrInputData(5, 3);
    System.out.println("---------------Begin to Construct Groupby Desc-------------");
    // 1. Build AggregationDesc
    String aggregate = "MAX";
    ExprNodeDesc inputColumn = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col0", "table", false);
    ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
    params.add(inputColumn);
    GenericUDAFEvaluator genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(aggregate, params, null, false, false);
    AggregationDesc agg = new AggregationDesc(aggregate, genericUDAFEvaluator, params, false, GenericUDAFEvaluator.Mode.PARTIAL1);
    ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
    aggs.add(agg);
    // 2. aggr keys
    ExprNodeDesc key1 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col1", "table", false);
    ExprNodeDesc key2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "col2", "table", false);
    ArrayList<ExprNodeDesc> keys = new ArrayList<>();
    keys.add(key1);
    keys.add(key2);
    // 3. outputCols
    // @see org.apache.hadoop.hive.ql.exec.GroupByOperator.forward
    // outputColumnNames, including: group by keys, agg evaluators output cols.
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    for (int i = 0; i < keys.size() + aggs.size(); i++) {
        outputColumnNames.add("_col" + i);
    }
    // 4. build GroupByDesc desc
    GroupByDesc desc = new GroupByDesc();
    desc.setOutputColumnNames(outputColumnNames);
    desc.setAggregators(aggs);
    desc.setKeys(keys);
    desc.setMode(GroupByDesc.Mode.HASH);
    desc.setMemoryThreshold(1.0f);
    desc.setGroupByMemoryUsage(1.0f);
    // minReductionHashAggr
    desc.setMinReductionHashAggr(0.5f);
    // 5. Configure hive conf and  Build group by operator
    HiveConf hconf = new HiveConf();
    HiveConf.setIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL, 1);
    // 6. test hash aggr without grouping sets
    System.out.println("---------------Begin to test hash group by without grouping sets-------------");
    int withoutGroupingSetsExpectSize = 3;
    GroupByOperator op = new GroupByOperator(new CompilationOpContext());
    op.setConf(desc);
    testHashAggr(op, hconf, input, withoutGroupingSetsExpectSize);
    // 7. test hash aggr with  grouping sets
    System.out.println("---------------Begin to test hash group by with grouping sets------------");
    int groupingSetsExpectSize = 6;
    desc.setGroupingSetsPresent(true);
    ArrayList<Long> groupingSets = new ArrayList<>();
    // groupingSets
    groupingSets.add(1L);
    groupingSets.add(2L);
    desc.setListGroupingSets(groupingSets);
    // add grouping sets dummy key
    ExprNodeDesc groupingSetDummyKey = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, 0L);
    keys.add(groupingSetDummyKey);
    desc.setKeys(keys);
    // groupingSet Position
    desc.setGroupingSetPosition(2);
    op = new GroupByOperator(new CompilationOpContext());
    op.setConf(desc);
    testHashAggr(op, hconf, input, groupingSetsExpectSize);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) Test(org.junit.Test)

Example 19 with InspectableObject

use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.

the class ColumnStatsTask method constructColumnStatsFromPackedRows.

private List<ColumnStatistics> constructColumnStatsFromPackedRows(Hive db) throws HiveException, MetaException, IOException {
    String currentDb = SessionState.get().getCurrentDatabase();
    String tableName = work.getColStats().getTableName();
    String partName = null;
    List<String> colName = work.getColStats().getColName();
    List<String> colType = work.getColStats().getColType();
    boolean isTblLevel = work.getColStats().isTblLevel();
    List<ColumnStatistics> stats = new ArrayList<ColumnStatistics>();
    InspectableObject packedRow;
    Table tbl = db.getTable(currentDb, tableName);
    while ((packedRow = ftOp.getNextRow()) != null) {
        if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) {
            throw new HiveException("Unexpected object type encountered while unpacking row");
        }
        List<ColumnStatisticsObj> statsObjs = new ArrayList<ColumnStatisticsObj>();
        StructObjectInspector soi = (StructObjectInspector) packedRow.oi;
        List<? extends StructField> fields = soi.getAllStructFieldRefs();
        List<Object> list = soi.getStructFieldsDataAsList(packedRow.o);
        List<FieldSchema> partColSchema = tbl.getPartCols();
        // Partition columns are appended at end, we only care about stats column
        int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size();
        for (int i = 0; i < numOfStatCols; i++) {
            // Get the field objectInspector, fieldName and the field object.
            ObjectInspector foi = fields.get(i).getFieldObjectInspector();
            Object f = (list == null ? null : list.get(i));
            String fieldName = fields.get(i).getFieldName();
            ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
            statsObj.setColName(colName.get(i));
            statsObj.setColType(colType.get(i));
            unpackStructObject(foi, f, fieldName, statsObj);
            statsObjs.add(statsObj);
        }
        if (!isTblLevel) {
            List<String> partVals = new ArrayList<String>();
            // Iterate over partition columns to figure out partition name
            for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) {
                Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(list.get(i));
                partVals.add(// could be null for default partition
                partVal == null ? this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString());
            }
            partName = Warehouse.makePartName(partColSchema, partVals);
        }
        String[] names = Utilities.getDbTableName(currentDb, tableName);
        ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, isTblLevel);
        ColumnStatistics colStats = new ColumnStatistics();
        colStats.setStatsDesc(statsDesc);
        colStats.setStatsObj(statsObjs);
        stats.add(colStats);
    }
    ftOp.clearFetchContext();
    return stats;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 20 with InspectableObject

use of org.apache.hadoop.hive.serde2.objectinspector.InspectableObject in project hive by apache.

the class ColStatsProcessor method constructColumnStatsFromPackedRows.

private List<ColumnStatistics> constructColumnStatsFromPackedRows(Table tbl1) throws HiveException, MetaException, IOException {
    Table tbl = tbl1;
    String partName = null;
    List<String> colName = colStatDesc.getColName();
    List<String> colType = colStatDesc.getColType();
    boolean isTblLevel = colStatDesc.isTblLevel();
    List<ColumnStatistics> stats = new ArrayList<ColumnStatistics>();
    InspectableObject packedRow;
    while ((packedRow = ftOp.getNextRow()) != null) {
        if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) {
            throw new HiveException("Unexpected object type encountered while unpacking row");
        }
        List<ColumnStatisticsObj> statsObjs = new ArrayList<ColumnStatisticsObj>();
        StructObjectInspector soi = (StructObjectInspector) packedRow.oi;
        List<? extends StructField> fields = soi.getAllStructFieldRefs();
        List<Object> list = soi.getStructFieldsDataAsList(packedRow.o);
        List<FieldSchema> partColSchema = tbl.getPartCols();
        // Partition columns are appended at end, we only care about stats column
        int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size();
        assert list != null;
        for (int i = 0; i < numOfStatCols; i++) {
            StructField structField = fields.get(i);
            String columnName = colName.get(i);
            String columnType = colType.get(i);
            Object values = list.get(i);
            try {
                ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveStruct(columnName, columnType, structField, values);
                statsObjs.add(statObj);
            } catch (Exception e) {
                if (isStatsReliable) {
                    throw new HiveException("Statistics collection failed while (hive.stats.reliable)", e);
                } else {
                    LOG.debug("Because {} is infinite or NaN, we skip stats.", columnName, e);
                }
            }
        }
        if (!statsObjs.isEmpty()) {
            if (!isTblLevel) {
                List<String> partVals = new ArrayList<String>();
                // Iterate over partition columns to figure out partition name
                for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) {
                    Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(list.get(i));
                    partVals.add(// could be null for default partition
                    partVal == null ? this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString());
                }
                partName = Warehouse.makePartName(partColSchema, partVals);
            }
            ColumnStatisticsDesc statsDesc = buildColumnStatsDesc(tbl, partName, isTblLevel);
            ColumnStatistics colStats = new ColumnStatistics();
            colStats.setStatsDesc(statsDesc);
            colStats.setStatsObj(statsObjs);
            stats.add(colStats);
        }
    }
    ftOp.clearFetchContext();
    return stats;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

InspectableObject (org.apache.hadoop.hive.serde2.objectinspector.InspectableObject)19 ArrayList (java.util.ArrayList)9 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)7 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)5 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)4 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 CollectDesc (org.apache.hadoop.hive.ql.plan.CollectDesc)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)4 JobConf (org.apache.hadoop.mapred.JobConf)4 Test (org.junit.Test)4 IOException (java.io.IOException)3 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)3 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)3 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)3 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)3 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)3 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)3 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)3 Text (org.apache.hadoop.io.Text)3