Search in sources :

Example 41 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class FileSinkOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    try {
        this.hconf = hconf;
        filesCreated = false;
        isNativeTable = !conf.getTableInfo().isNonNative();
        isTemporary = conf.isTemporary();
        multiFileSpray = conf.isMultiFileSpray();
        totalFiles = conf.getTotalFiles();
        numFiles = conf.getNumFiles();
        dpCtx = conf.getDynPartCtx();
        lbCtx = conf.getLbCtx();
        fsp = prevFsp = null;
        valToPaths = new HashMap<String, FSPaths>();
        taskId = Utilities.getTaskId(hconf);
        initializeSpecPath();
        fs = specPath.getFileSystem(hconf);
        try {
            createHiveOutputFormat(hconf);
        } catch (HiveException ex) {
            logOutputFormatError(hconf, ex);
            throw ex;
        }
        isCompressed = conf.getCompressed();
        parent = Utilities.toTempPath(conf.getDirName());
        statsFromRecordWriter = new boolean[numFiles];
        serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance();
        serializer.initialize(unsetNestedColumnPaths(hconf), conf.getTableInfo().getProperties());
        outputClass = serializer.getSerializedClass();
        if (isLogInfoEnabled) {
            LOG.info("Using serializer : " + serializer + " and formatter : " + hiveOutputFormat + (isCompressed ? " with compression" : ""));
        }
        // Timeout is chosen to make sure that even if one iteration takes more than
        // half of the script.timeout but less than script.timeout, we will still
        // be able to report progress.
        timeOut = hconf.getInt("mapred.healthChecker.script.timeout", 600000) / 2;
        if (hconf instanceof JobConf) {
            jc = (JobConf) hconf;
        } else {
            // test code path
            jc = new JobConf(hconf);
        }
        if (multiFileSpray) {
            partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
            int i = 0;
            for (ExprNodeDesc e : conf.getPartitionCols()) {
                partitionEval[i++] = ExprNodeEvaluatorFactory.get(e);
            }
            partitionObjectInspectors = initEvaluators(partitionEval, outputObjInspector);
            prtner = (HivePartitioner<HiveKey, Object>) ReflectionUtils.newInstance(jc.getPartitionerClass(), null);
        }
        if (dpCtx != null) {
            dpSetup();
        }
        if (lbCtx != null) {
            lbSetup();
        }
        if (!bDynParts) {
            fsp = new FSPaths(specPath);
            // createBucketFiles(fsp);
            if (!this.isSkewedStoredAsSubDirectories) {
                // special entry for non-DP case
                valToPaths.put("", fsp);
            }
        }
        final StoragePolicyValue tmpStorage = StoragePolicyValue.lookup(HiveConf.getVar(hconf, HIVE_TEMPORARY_TABLE_STORAGE));
        if (isTemporary && fsp != null && tmpStorage != StoragePolicyValue.DEFAULT) {
            final Path outputPath = fsp.taskOutputTempPath;
            StoragePolicyShim shim = ShimLoader.getHadoopShims().getStoragePolicyShim(fs);
            if (shim != null) {
                // directory creation is otherwise within the writers
                fs.mkdirs(outputPath);
                shim.setStoragePolicy(outputPath, tmpStorage);
            }
        }
        if (conf.getWriteType() == AcidUtils.Operation.UPDATE || conf.getWriteType() == AcidUtils.Operation.DELETE) {
            // ROW__ID is always in the first field
            recIdField = ((StructObjectInspector) outputObjInspector).getAllStructFieldRefs().get(0);
            recIdInspector = (StructObjectInspector) recIdField.getFieldObjectInspector();
            // bucket is the second field in the record id
            bucketField = recIdInspector.getAllStructFieldRefs().get(1);
            bucketInspector = (IntObjectInspector) bucketField.getFieldObjectInspector();
        }
        numRows = 0;
        cntr = 1;
        logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
        statsMap.put(getCounterName(Counter.RECORDS_OUT), row_count);
    } catch (HiveException e) {
        throw e;
    } catch (Exception e) {
        e.printStackTrace();
        throw new HiveException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StoragePolicyValue(org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyValue) HiveFatalException(org.apache.hadoop.hive.ql.metadata.HiveFatalException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) JobConf(org.apache.hadoop.mapred.JobConf) StoragePolicyShim(org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyShim) SubStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 42 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class GroupByOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    numRowsInput = 0;
    numRowsHashTbl = 0;
    heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT);
    countAfterReport = 0;
    groupingSetsPresent = conf.isGroupingSetsPresent();
    ObjectInspector rowInspector = inputObjInspectors[0];
    // init keyFields
    int numKeys = conf.getKeys().size();
    keyFields = new ExprNodeEvaluator[numKeys];
    keyObjectInspectors = new ObjectInspector[numKeys];
    currentKeyObjectInspectors = new ObjectInspector[numKeys];
    for (int i = 0; i < numKeys; i++) {
        keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i), hconf);
        keyObjectInspectors[i] = keyFields[i].initialize(rowInspector);
        currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i], ObjectInspectorCopyOption.WRITABLE);
    }
    // each row
    if (groupingSetsPresent) {
        groupingSets = conf.getListGroupingSets();
        groupingSetsPosition = conf.getGroupingSetPosition();
        newKeysGroupingSets = new IntWritable[groupingSets.size()];
        groupingSetsBitSet = new FastBitSet[groupingSets.size()];
        int pos = 0;
        for (Integer groupingSet : groupingSets) {
            // Create the mapping corresponding to the grouping set
            newKeysGroupingSets[pos] = new IntWritable(groupingSet);
            groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet, groupingSetsPosition);
            pos++;
        }
    }
    // initialize unionExpr for reduce-side
    // reduce KEY has union field as the last field if there are distinct
    // aggregates in group-by.
    List<? extends StructField> sfs = ((StructObjectInspector) rowInspector).getAllStructFieldRefs();
    if (sfs.size() > 0) {
        StructField keyField = sfs.get(0);
        if (keyField.getFieldName().toUpperCase().equals(Utilities.ReduceField.KEY.name())) {
            ObjectInspector keyObjInspector = keyField.getFieldObjectInspector();
            if (keyObjInspector instanceof StructObjectInspector) {
                List<? extends StructField> keysfs = ((StructObjectInspector) keyObjInspector).getAllStructFieldRefs();
                if (keysfs.size() > 0) {
                    // the last field is the union field, if any
                    StructField sf = keysfs.get(keysfs.size() - 1);
                    if (sf.getFieldObjectInspector().getCategory().equals(ObjectInspector.Category.UNION)) {
                        unionExprEval = ExprNodeEvaluatorFactory.get(new ExprNodeColumnDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), keyField.getFieldName() + "." + sf.getFieldName(), null, false), hconf);
                        unionExprEval.initialize(rowInspector);
                    }
                }
            }
        }
    }
    // init aggregationParameterFields
    ArrayList<AggregationDesc> aggrs = conf.getAggregators();
    aggregationParameterFields = new ExprNodeEvaluator[aggrs.size()][];
    aggregationParameterObjectInspectors = new ObjectInspector[aggrs.size()][];
    aggregationParameterStandardObjectInspectors = new ObjectInspector[aggrs.size()][];
    aggregationParameterObjects = new Object[aggrs.size()][];
    aggregationIsDistinct = new boolean[aggrs.size()];
    for (int i = 0; i < aggrs.size(); i++) {
        AggregationDesc aggr = aggrs.get(i);
        ArrayList<ExprNodeDesc> parameters = aggr.getParameters();
        aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()];
        aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters.size()];
        aggregationParameterStandardObjectInspectors[i] = new ObjectInspector[parameters.size()];
        aggregationParameterObjects[i] = new Object[parameters.size()];
        for (int j = 0; j < parameters.size(); j++) {
            aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j), hconf);
            aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j].initialize(rowInspector);
            if (unionExprEval != null) {
                String[] names = parameters.get(j).getExprString().split("\\.");
                // parameters of the form : KEY.colx:t.coly
                if (Utilities.ReduceField.KEY.name().equals(names[0]) && names.length > 2) {
                    String name = names[names.length - 2];
                    int tag = Integer.parseInt(name.split("\\:")[1]);
                    if (aggr.getDistinct()) {
                        // is distinct
                        Set<Integer> set = distinctKeyAggrs.get(tag);
                        if (null == set) {
                            set = new HashSet<Integer>();
                            distinctKeyAggrs.put(tag, set);
                        }
                        if (!set.contains(i)) {
                            set.add(i);
                        }
                    } else {
                        Set<Integer> set = nonDistinctKeyAggrs.get(tag);
                        if (null == set) {
                            set = new HashSet<Integer>();
                            nonDistinctKeyAggrs.put(tag, set);
                        }
                        if (!set.contains(i)) {
                            set.add(i);
                        }
                    }
                } else {
                    // will be KEY._COLx or VALUE._COLx
                    if (!nonDistinctAggrs.contains(i)) {
                        nonDistinctAggrs.add(i);
                    }
                }
            } else {
                if (aggr.getDistinct()) {
                    aggregationIsDistinct[i] = true;
                }
            }
            aggregationParameterStandardObjectInspectors[i][j] = ObjectInspectorUtils.getStandardObjectInspector(aggregationParameterObjectInspectors[i][j], ObjectInspectorCopyOption.WRITABLE);
            aggregationParameterObjects[i][j] = null;
        }
        if (parameters.size() == 0) {
            // for ex: count(*)
            if (!nonDistinctAggrs.contains(i)) {
                nonDistinctAggrs.add(i);
            }
        }
    }
    // init aggregationClasses
    aggregationEvaluators = new GenericUDAFEvaluator[conf.getAggregators().size()];
    for (int i = 0; i < aggregationEvaluators.length; i++) {
        AggregationDesc agg = conf.getAggregators().get(i);
        aggregationEvaluators[i] = agg.getGenericUDAFEvaluator();
    }
    MapredContext context = MapredContext.get();
    if (context != null) {
        for (GenericUDAFEvaluator genericUDAFEvaluator : aggregationEvaluators) {
            context.setup(genericUDAFEvaluator);
        }
    }
    // grouping id should be pruned, which is the last of key columns
    // see ColumnPrunerGroupByProc
    outputKeyLength = conf.pruneGroupingSetId() ? keyFields.length - 1 : keyFields.length;
    // init objectInspectors
    ObjectInspector[] objectInspectors = new ObjectInspector[outputKeyLength + aggregationEvaluators.length];
    for (int i = 0; i < outputKeyLength; i++) {
        objectInspectors[i] = currentKeyObjectInspectors[i];
    }
    for (int i = 0; i < aggregationEvaluators.length; i++) {
        objectInspectors[outputKeyLength + i] = aggregationEvaluators[i].init(conf.getAggregators().get(i).getMode(), aggregationParameterObjectInspectors[i]);
    }
    aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][];
    if ((conf.getMode() != GroupByDesc.Mode.HASH || conf.getBucketGroup()) && (!groupingSetsPresent)) {
        aggregations = newAggregations();
        hashAggr = false;
    } else {
        hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>(256);
        aggregations = newAggregations();
        hashAggr = true;
        keyPositionsSize = new ArrayList<Integer>();
        aggrPositions = new List[aggregations.length];
        groupbyMapAggrInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL);
        // compare every groupbyMapAggrInterval rows
        numRowsCompareHashAggr = groupbyMapAggrInterval;
        minReductionHashAggr = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
    }
    List<String> fieldNames = new ArrayList<String>(conf.getOutputColumnNames());
    outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, Arrays.asList(objectInspectors));
    KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors);
    newKeys = keyWrapperFactory.getKeyWrapper();
    isTez = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
    isLlap = isTez && HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_MODE).equals("llap");
    numExecutors = isLlap ? HiveConf.getIntVar(hconf, HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS) : 1;
    firstRow = true;
    // is not known, estimate that based on the number of entries
    if (hashAggr) {
        computeMaxEntriesHashAggr();
    }
    memoryMXBean = ManagementFactory.getMemoryMXBean();
    maxMemory = isTez ? getConf().getMaxMemoryAvailable() : memoryMXBean.getHeapMemoryUsage().getMax();
    memoryThreshold = this.getConf().getMemoryThreshold();
    LOG.info("isTez: {} isLlap: {} numExecutors: {} maxMemory: {}", isTez, isLlap, numExecutors, maxMemory);
}
Also used : GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IntWritable(org.apache.hadoop.io.IntWritable) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) LazyBinaryObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 43 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class GroupByOperator method genColLists.

// Group by contains the columns needed - no need to aggregate from children
public List<String> genColLists(HashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx) {
    List<String> colLists = new ArrayList<String>();
    ArrayList<ExprNodeDesc> keys = conf.getKeys();
    for (ExprNodeDesc key : keys) {
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());
    }
    ArrayList<AggregationDesc> aggrs = conf.getAggregators();
    for (AggregationDesc aggr : aggrs) {
        ArrayList<ExprNodeDesc> params = aggr.getParameters();
        for (ExprNodeDesc param : params) {
            colLists = Utilities.mergeUniqElems(colLists, param.getCols());
        }
    }
    return colLists;
}
Also used : ArrayList(java.util.ArrayList) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 44 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class SparkDynamicPartitionPruner method initialize.

public void initialize(MapWork work, JobConf jobConf) throws SerDeException {
    Map<String, SourceInfo> columnMap = new HashMap<String, SourceInfo>();
    Set<String> sourceWorkIds = work.getEventSourceTableDescMap().keySet();
    for (String id : sourceWorkIds) {
        List<TableDesc> tables = work.getEventSourceTableDescMap().get(id);
        List<String> columnNames = work.getEventSourceColumnNameMap().get(id);
        List<ExprNodeDesc> partKeyExprs = work.getEventSourcePartKeyExprMap().get(id);
        Iterator<String> cit = columnNames.iterator();
        Iterator<ExprNodeDesc> pit = partKeyExprs.iterator();
        for (TableDesc t : tables) {
            String columnName = cit.next();
            ExprNodeDesc partKeyExpr = pit.next();
            SourceInfo si = new SourceInfo(t, partKeyExpr, columnName, jobConf);
            if (!sourceInfoMap.containsKey(id)) {
                sourceInfoMap.put(id, new ArrayList<SourceInfo>());
            }
            sourceInfoMap.get(id).add(si);
            // the union of the values in that case.
            if (columnMap.containsKey(columnName)) {
                si.values = columnMap.get(columnName).values;
            }
            columnMap.put(columnName, si);
        }
    }
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 45 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class VectorizationContext method getCastToLongExpression.

private VectorExpression getCastToLongExpression(List<ExprNodeDesc> childExpr, PrimitiveCategory integerPrimitiveCategory) throws HiveException {
    ExprNodeDesc child = childExpr.get(0);
    String inputType = childExpr.get(0).getTypeString();
    if (child instanceof ExprNodeConstantDesc) {
        // Return a constant vector expression
        Object constantValue = ((ExprNodeConstantDesc) child).getValue();
        Long longValue = castConstantToLong(constantValue, child.getTypeInfo(), integerPrimitiveCategory);
        return getConstantVectorExpression(longValue, TypeInfoFactory.longTypeInfo, VectorExpressionDescriptor.Mode.PROJECTION);
    }
    // special handling.
    if (isIntFamily(inputType)) {
        // integer and boolean types require no conversion, so use a no-op
        return getIdentityExpression(childExpr);
    }
    return null;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) VectorUDAFAvgLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLong) VectorUDAFStdSampLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampLong) VectorUDAFVarSampLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong) VectorUDAFMaxLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxLong) VectorUDAFMinLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinLong) VectorUDAFStdPopLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopLong) VectorUDAFSumLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumLong) VectorUDAFVarPopLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopLong) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) VectorUDAFMaxString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString) VectorUDAFMinString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)321 ArrayList (java.util.ArrayList)179 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)146 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)110 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)101 Test (org.junit.Test)74 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)69 HashMap (java.util.HashMap)67 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)57 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)47 LinkedHashMap (java.util.LinkedHashMap)43 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)42 List (java.util.List)40 Operator (org.apache.hadoop.hive.ql.exec.Operator)39 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)35 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)34 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)34 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)34 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)33 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)32