Search in sources :

Example 11 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class MapJoinKey method serializeVector.

/**
 * Serializes row to output for vectorized path.
 * @param byteStream Output to reuse. Can be null, in that case a new one would be created.
 */
public static Output serializeVector(Output byteStream, VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, boolean[] nulls, boolean[] sortableSortOrders, byte[] nullMarkers, byte[] notNullMarkers) throws HiveException, SerDeException {
    Object[] fieldData = new Object[keyOutputWriters.length];
    List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>();
    for (int i = 0; i < keyOutputWriters.length; ++i) {
        VectorExpressionWriter writer = keyOutputWriters[i];
        fieldOis.add(writer.getObjectInspector());
        // This is rather convoluted... to simplify for perf, we could call getRawKeyValue
        // instead of writable, and serialize based on Java type as opposed to OI.
        fieldData[i] = keyWrapperBatch.getWritableKeyValue(kw, i, writer);
        if (nulls != null) {
            nulls[i] = (fieldData[i] == null);
        }
    }
    return serializeRow(byteStream, fieldData, fieldOis, sortableSortOrders, nullMarkers, notNullMarkers);
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter)

Example 12 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class TestVectorMapJoinFastRowHashMap method addAndVerifyRows.

private void addAndVerifyRows(VectorRandomRowSource valueSource, Object[][] rows, VectorMapJoinFastHashTable map, HashTableKeyType hashTableKeyType, VerifyFastRowHashMap verifyTable, String[] keyTypeNames, boolean doClipping, boolean useExactBytes) throws HiveException, IOException, SerDeException {
    final int keyCount = keyTypeNames.length;
    PrimitiveTypeInfo[] keyPrimitiveTypeInfos = new PrimitiveTypeInfo[keyCount];
    PrimitiveCategory[] keyPrimitiveCategories = new PrimitiveCategory[keyCount];
    ArrayList<ObjectInspector> keyPrimitiveObjectInspectorList = new ArrayList<ObjectInspector>(keyCount);
    for (int i = 0; i < keyCount; i++) {
        PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(keyTypeNames[i]);
        keyPrimitiveTypeInfos[i] = primitiveTypeInfo;
        PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
        keyPrimitiveCategories[i] = primitiveCategory;
        keyPrimitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
    }
    boolean[] keyColumnSortOrderIsDesc = new boolean[keyCount];
    Arrays.fill(keyColumnSortOrderIsDesc, false);
    byte[] keyColumnNullMarker = new byte[keyCount];
    Arrays.fill(keyColumnNullMarker, BinarySortableSerDe.ZERO);
    byte[] keyColumnNotNullMarker = new byte[keyCount];
    Arrays.fill(keyColumnNotNullMarker, BinarySortableSerDe.ONE);
    BinarySortableSerializeWrite keySerializeWrite = new BinarySortableSerializeWrite(keyColumnSortOrderIsDesc, keyColumnNullMarker, keyColumnNotNullMarker);
    TypeInfo[] valueTypeInfos = valueSource.typeInfos();
    final int columnCount = valueTypeInfos.length;
    SerializeWrite valueSerializeWrite = new LazyBinarySerializeWrite(columnCount);
    final int count = rows.length;
    for (int i = 0; i < count; i++) {
        Object[] valueRow = rows[i];
        Output valueOutput = new Output();
        ((LazyBinarySerializeWrite) valueSerializeWrite).set(valueOutput);
        for (int index = 0; index < columnCount; index++) {
            VerifyFastRow.serializeWrite(valueSerializeWrite, valueTypeInfos[index], valueRow[index]);
        }
        byte[] value = Arrays.copyOf(valueOutput.getData(), valueOutput.getLength());
        // Add a new key or add a value to an existing key?
        byte[] key;
        if (random.nextBoolean() || verifyTable.getCount() == 0) {
            Object[] keyRow = VectorRandomRowSource.randomWritablePrimitiveRow(keyCount, random, keyPrimitiveTypeInfos);
            Output keyOutput = new Output();
            keySerializeWrite.set(keyOutput);
            for (int index = 0; index < keyCount; index++) {
                VerifyFastRow.serializeWrite(keySerializeWrite, keyPrimitiveTypeInfos[index], keyRow[index]);
            }
            key = Arrays.copyOf(keyOutput.getData(), keyOutput.getLength());
            verifyTable.add(key, keyRow, value, valueRow);
        } else {
            key = verifyTable.addRandomExisting(value, valueRow, random);
        }
        // Serialize keyRow into key bytes.
        BytesWritable keyWritable = new BytesWritable(key);
        BytesWritable valueWritable = new BytesWritable(value);
        map.putRow(keyWritable, valueWritable);
    // verifyTable.verify(map);
    }
    verifyTable.verify(map, hashTableKeyType, valueTypeInfos, doClipping, useExactBytes, random);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BytesWritable(org.apache.hadoop.io.BytesWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) SerializeWrite(org.apache.hadoop.hive.serde2.fast.SerializeWrite) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite)

Example 13 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project SQLWindowing by hbutani.

the class MRUtils method initialize.

/**
 * Construct the data structures containing ExprNodeDesc for partition
 * columns and order columns. Use the input definition to construct the list
 * of output columns for the ReduceSinkOperator
 *
 * @throws WindowingException
 */
public void initialize() throws WindowingException {
    TableFuncDef tabDef = RuntimeUtils.getFirstTableFunction(qdef);
    hiveTableDef = tabDef.getHiveTableDef();
    InputInfo inputInfo;
    ArrayList<ColumnDef> partColList = tabDef.getWindow().getPartDef().getColumns();
    TableFunctionEvaluator tEval = tabDef.getFunction();
    /*
		 * If the query has a map phase, the inputInfo is retrieved from the map
		 * output info of the table function definition. This is constructed
		 * using the map output oi of the table function definition. If the
		 * query does not have a map phase, the inputInfo is retrieved from the
		 * QueryInputDef (either HiveTableDef or HiveQueryDef) of the query.
		 */
    if (tEval.isTransformsRawInput()) {
        inputInfo = qdef.getTranslationInfo().getMapInputInfo(tabDef);
    } else {
        inputInfo = qdef.getTranslationInfo().getInputInfo(hiveTableDef);
    }
    for (ColumnDef colDef : partColList) {
        partCols.add(colDef.getExprNode());
    }
    ArrayList<OrderColumnDef> orderColList = tabDef.getWindow().getOrderDef().getColumns();
    for (OrderColumnDef colDef : orderColList) {
        Order order = colDef.getOrder();
        if (order.name().equals("ASC")) {
            orderString.append('+');
        } else {
            orderString.append('-');
        }
        orderCols.add(colDef.getExprNode());
        outputColumnNames.add(colDef.getAlias());
    }
    RowResolver rr = inputInfo.getRowResolver();
    ArrayList<ColumnInfo> colInfoList = rr.getColumnInfos();
    for (ColumnInfo colInfo : colInfoList) {
        String internalName = colInfo.getInternalName();
        TypeInfo type = colInfo.getType();
        valueCols.add(TranslateUtils.getExprDesc(internalName, type));
        outputColumnNames.add(internalName);
    }
}
Also used : Order(com.sap.hadoop.metadata.Order) OrderColumnDef(com.sap.hadoop.windowing.query2.definition.OrderColumnDef) ColumnDef(com.sap.hadoop.windowing.query2.definition.ColumnDef) OrderColumnDef(com.sap.hadoop.windowing.query2.definition.OrderColumnDef) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RowResolver(org.apache.hadoop.hive.ql.parse.RowResolver) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TableFuncDef(com.sap.hadoop.windowing.query2.definition.TableFuncDef) InputInfo(com.sap.hadoop.windowing.query2.translate.QueryTranslationInfo.InputInfo) TableFunctionEvaluator(com.sap.hadoop.windowing.functions2.TableFunctionEvaluator)

Example 14 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class DDLSemanticAnalyzer method getFullPartitionSpecs.

/**
 * Get the partition specs from the tree. This stores the full specification
 * with the comparator operator into the output list.
 *
 * @param ast Tree to extract partitions from.
 * @param tab Table.
 * @return    Map of partitions by prefix length. Most of the time prefix length will
 *            be the same for all partition specs, so we can just OR the expressions.
 */
private Map<Integer, List<ExprNodeGenericFuncDesc>> getFullPartitionSpecs(CommonTree ast, Table tab, boolean canGroupExprs) throws SemanticException {
    String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULTPARTITIONNAME);
    Map<String, String> colTypes = new HashMap<String, String>();
    for (FieldSchema fs : tab.getPartitionKeys()) {
        colTypes.put(fs.getName().toLowerCase(), fs.getType());
    }
    Map<Integer, List<ExprNodeGenericFuncDesc>> result = new HashMap<Integer, List<ExprNodeGenericFuncDesc>>();
    for (int childIndex = 0; childIndex < ast.getChildCount(); childIndex++) {
        Tree partSpecTree = ast.getChild(childIndex);
        if (partSpecTree.getType() != HiveParser.TOK_PARTSPEC) {
            continue;
        }
        ExprNodeGenericFuncDesc expr = null;
        HashSet<String> names = new HashSet<String>(partSpecTree.getChildCount());
        for (int i = 0; i < partSpecTree.getChildCount(); ++i) {
            CommonTree partSpecSingleKey = (CommonTree) partSpecTree.getChild(i);
            assert (partSpecSingleKey.getType() == HiveParser.TOK_PARTVAL);
            String key = stripIdentifierQuotes(partSpecSingleKey.getChild(0).getText()).toLowerCase();
            String operator = partSpecSingleKey.getChild(1).getText();
            ASTNode partValNode = (ASTNode) partSpecSingleKey.getChild(2);
            TypeCheckCtx typeCheckCtx = new TypeCheckCtx(null);
            ExprNodeConstantDesc valExpr = (ExprNodeConstantDesc) TypeCheckProcFactory.genExprNode(partValNode, typeCheckCtx).get(partValNode);
            Object val = valExpr.getValue();
            boolean isDefaultPartitionName = val.equals(defaultPartitionName);
            String type = colTypes.get(key);
            PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(type);
            if (type == null) {
                throw new SemanticException("Column " + key + " not found");
            }
            // Create the corresponding hive expression to filter on partition columns.
            if (!isDefaultPartitionName) {
                if (!valExpr.getTypeString().equals(type)) {
                    Converter converter = ObjectInspectorConverters.getConverter(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(valExpr.getTypeInfo()), TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(pti));
                    val = converter.convert(valExpr.getValue());
                }
            }
            ExprNodeColumnDesc column = new ExprNodeColumnDesc(pti, key, null, true);
            ExprNodeGenericFuncDesc op;
            if (!isDefaultPartitionName) {
                op = makeBinaryPredicate(operator, column, new ExprNodeConstantDesc(pti, val));
            } else {
                GenericUDF originalOp = FunctionRegistry.getFunctionInfo(operator).getGenericUDF();
                String fnName;
                if (FunctionRegistry.isEq(originalOp)) {
                    fnName = "isnull";
                } else if (FunctionRegistry.isNeq(originalOp)) {
                    fnName = "isnotnull";
                } else {
                    throw new SemanticException("Cannot use " + operator + " in a default partition spec; only '=' and '!=' are allowed.");
                }
                op = makeUnaryPredicate(fnName, column);
            }
            // If it's multi-expr filter (e.g. a='5', b='2012-01-02'), AND with previous exprs.
            expr = (expr == null) ? op : makeBinaryPredicate("and", expr, op);
            names.add(key);
        }
        if (expr == null) {
            continue;
        }
        // We got the expr for one full partition spec. Determine the prefix length.
        int prefixLength = calculatePartPrefix(tab, names);
        List<ExprNodeGenericFuncDesc> orExpr = result.get(prefixLength);
        // If we don't, create a new separate filter. In most cases there will only be one.
        if (orExpr == null) {
            result.put(prefixLength, Lists.newArrayList(expr));
        } else if (canGroupExprs) {
            orExpr.set(0, makeBinaryPredicate("or", expr, orExpr.get(0)));
        } else {
            orExpr.add(expr);
        }
    }
    return result;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) CommonTree(org.antlr.runtime.tree.CommonTree) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) CommonTree(org.antlr.runtime.tree.CommonTree) Tree(org.antlr.runtime.tree.Tree) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) HashSet(java.util.HashSet)

Example 15 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class TaskCompiler method compile.

@SuppressWarnings({ "nls", "unchecked" })
public void compile(final ParseContext pCtx, final List<Task<? extends Serializable>> rootTasks, final HashSet<ReadEntity> inputs, final HashSet<WriteEntity> outputs) throws SemanticException {
    Context ctx = pCtx.getContext();
    GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx();
    List<Task<MoveWork>> mvTask = new ArrayList<>();
    List<LoadTableDesc> loadTableWork = pCtx.getLoadTableWork();
    List<LoadFileDesc> loadFileWork = pCtx.getLoadFileWork();
    boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite();
    int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit();
    if (pCtx.getFetchTask() != null) {
        if (pCtx.getFetchTask().getTblDesc() == null) {
            return;
        }
        pCtx.getFetchTask().getWork().setHiveServerQuery(SessionState.get().isHiveServerQuery());
        TableDesc resultTab = pCtx.getFetchTask().getTblDesc();
        // then either the ThriftFormatter or the DefaultFetchFormatter should be used.
        if (!resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
            if (SessionState.get().isHiveServerQuery()) {
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName());
            } else {
                String formatterName = conf.get(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER);
                if (formatterName == null || formatterName.isEmpty()) {
                    conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, DefaultFetchFormatter.class.getName());
                }
            }
        }
        return;
    }
    optimizeOperatorPlan(pCtx, inputs, outputs);
    /*
     * In case of a select, use a fetch task instead of a move task.
     * If the select is from analyze table column rewrite, don't create a fetch task. Instead create
     * a column stats task later.
     */
    if (pCtx.getQueryProperties().isQuery() && !isCStats) {
        if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
            throw new SemanticException(ErrorMsg.INVALID_LOAD_TABLE_FILE_WORK.getMsg());
        }
        LoadFileDesc loadFileDesc = loadFileWork.get(0);
        String cols = loadFileDesc.getColumns();
        String colTypes = loadFileDesc.getColumnTypes();
        String resFileFormat;
        TableDesc resultTab = pCtx.getFetchTableDesc();
        if (resultTab == null) {
            resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
            if (SessionState.get().getIsUsingThriftJDBCBinarySerDe() && (resFileFormat.equalsIgnoreCase("SequenceFile"))) {
                resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, ThriftJDBCBinarySerDe.class);
                // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                // read formatted thrift objects from the output SequenceFile written by Tasks.
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
            } else {
                resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, LazySimpleSerDe.class);
            }
        } else {
            if (resultTab.getProperties().getProperty(serdeConstants.SERIALIZATION_LIB).equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
                // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                // read formatted thrift objects from the output SequenceFile written by Tasks.
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
            }
        }
        FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit);
        boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
        fetch.setHiveServerQuery(isHiveServerQuery);
        fetch.setSource(pCtx.getFetchSource());
        fetch.setSink(pCtx.getFetchSink());
        if (isHiveServerQuery && null != resultTab && resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
            fetch.setIsUsingThriftJDBCBinarySerDe(true);
        } else {
            fetch.setIsUsingThriftJDBCBinarySerDe(false);
        }
        pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch));
        // For the FetchTask, the limit optimization requires we fetch all the rows
        // in memory and count how many rows we get. It's not practical if the
        // limit factor is too big
        int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
        if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
            LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit + ". Doesn't qualify limit optimization.");
            globalLimitCtx.disableOpt();
        }
        if (outerQueryLimit == 0) {
            // Believe it or not, some tools do generate queries with limit 0 and than expect
            // query to run quickly. Lets meet their requirement.
            LOG.info("Limit 0. No query execution needed.");
            return;
        }
    } else if (!isCStats) {
        for (LoadTableDesc ltd : loadTableWork) {
            Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false));
            mvTask.add(tsk);
        }
        boolean oneLoadFileForCtas = true;
        for (LoadFileDesc lfd : loadFileWork) {
            if (pCtx.getQueryProperties().isCTAS() || pCtx.getQueryProperties().isMaterializedView()) {
                if (!oneLoadFileForCtas) {
                    // should not have more than 1 load file for CTAS.
                    throw new SemanticException("One query is not expected to contain multiple CTAS loads statements");
                }
                setLoadFileLocation(pCtx, lfd);
                oneLoadFileForCtas = false;
            }
            mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false)));
        }
    }
    generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs);
    // For each task, set the key descriptor for the reducer
    for (Task<? extends Serializable> rootTask : rootTasks) {
        GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask);
    }
    // to be used, please do so
    for (Task<? extends Serializable> rootTask : rootTasks) {
        setInputFormat(rootTask);
    }
    optimizeTaskPlan(rootTasks, pCtx, ctx);
    /*
     * If the query was the result of analyze table column compute statistics rewrite, create
     * a column stats task instead of a fetch task to persist stats to the metastore.
     * As per HIVE-15903, we will also collect table stats when user computes column stats.
     * That means, if isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()
     * We need to collect table stats
     * if isCStats, we need to include a basic stats task
     * else it is ColumnStatsAutoGather, which should have a move task with a stats task already.
     */
    if (isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()) {
        // map from tablename to task (ColumnStatsTask which includes a BasicStatsTask)
        Map<String, StatsTask> map = new LinkedHashMap<>();
        if (isCStats) {
            if (rootTasks == null || rootTasks.size() != 1 || pCtx.getTopOps() == null || pCtx.getTopOps().size() != 1) {
                throw new SemanticException("Can not find correct root task!");
            }
            try {
                Task<? extends Serializable> root = rootTasks.iterator().next();
                StatsTask tsk = (StatsTask) genTableStats(pCtx, pCtx.getTopOps().values().iterator().next(), root, outputs);
                root.addDependentTask(tsk);
                map.put(extractTableFullName(tsk), tsk);
            } catch (HiveException e) {
                throw new SemanticException(e);
            }
            genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, map, outerQueryLimit, 0);
        } else {
            Set<Task<? extends Serializable>> leafTasks = new LinkedHashSet<Task<? extends Serializable>>();
            getLeafTasks(rootTasks, leafTasks);
            List<Task<? extends Serializable>> nonStatsLeafTasks = new ArrayList<>();
            for (Task<? extends Serializable> tsk : leafTasks) {
                // map table name to the correct ColumnStatsTask
                if (tsk instanceof StatsTask) {
                    map.put(extractTableFullName((StatsTask) tsk), (StatsTask) tsk);
                } else {
                    nonStatsLeafTasks.add(tsk);
                }
            }
            // add cStatsTask as a dependent of all the nonStatsLeafTasks
            for (Task<? extends Serializable> tsk : nonStatsLeafTasks) {
                for (Task<? extends Serializable> cStatsTask : map.values()) {
                    tsk.addDependentTask(cStatsTask);
                }
            }
            for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx.getColumnStatsAutoGatherContexts()) {
                if (!columnStatsAutoGatherContext.isInsertInto()) {
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), map, outerQueryLimit, 0);
                } else {
                    int numBitVector;
                    try {
                        numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
                    } catch (Exception e) {
                        throw new SemanticException(e.getMessage());
                    }
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), map, outerQueryLimit, numBitVector);
                }
            }
        }
    }
    decideExecMode(rootTasks, ctx, globalLimitCtx);
    if (pCtx.getQueryProperties().isCTAS() && !pCtx.getCreateTable().isMaterialization()) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateTableDesc crtTblDesc = pCtx.getCreateTable();
        crtTblDesc.validate(conf);
        Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(inputs, outputs, crtTblDesc));
        patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtTblTask);
    } else if (pCtx.getQueryProperties().isMaterializedView()) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateViewDesc viewDesc = pCtx.getCreateViewDesc();
        Task<? extends Serializable> crtViewTask = TaskFactory.get(new DDLWork(inputs, outputs, viewDesc));
        patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtViewTask);
    } else if (pCtx.getMaterializedViewUpdateDesc() != null) {
        // If there is a materialized view update desc, we create introduce it at the end
        // of the tree.
        MaterializedViewDesc materializedViewDesc = pCtx.getMaterializedViewUpdateDesc();
        Set<Task<? extends Serializable>> leafTasks = new LinkedHashSet<Task<? extends Serializable>>();
        getLeafTasks(rootTasks, leafTasks);
        Task<? extends Serializable> materializedViewTask = TaskFactory.get(materializedViewDesc, conf);
        for (Task<? extends Serializable> task : leafTasks) {
            task.addDependentTask(materializedViewTask);
        }
    }
    if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) {
        LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
        pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
    }
    if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
        LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
        globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());
    }
    Interner<TableDesc> interner = Interners.newStrongInterner();
    for (Task<? extends Serializable> rootTask : rootTasks) {
        GenMapRedUtils.internTableDesc(rootTask, interner);
        GenMapRedUtils.deriveFinalExplainAttributes(rootTask, pCtx.getConf());
    }
}
Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LinkedHashSet(java.util.LinkedHashSet) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) DDLTask(org.apache.hadoop.hive.ql.exec.DDLTask) Task(org.apache.hadoop.hive.ql.exec.Task) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) Serializable(java.io.Serializable) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MaterializedViewDesc(org.apache.hadoop.hive.ql.exec.MaterializedViewDesc) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) ThriftFormatter(org.apache.hadoop.hive.serde2.thrift.ThriftFormatter) CreateViewDesc(org.apache.hadoop.hive.ql.plan.CreateViewDesc) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) NoOpFetchFormatter(org.apache.hadoop.hive.serde2.NoOpFetchFormatter) Context(org.apache.hadoop.hive.ql.Context) AnalyzeRewriteContext(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) DefaultFetchFormatter(org.apache.hadoop.hive.serde2.DefaultFetchFormatter) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)77 ArrayList (java.util.ArrayList)72 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)48 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)48 Test (org.junit.Test)44 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)43 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)42 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)41 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)41 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)37 Text (org.apache.hadoop.io.Text)35 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)32 IOException (java.io.IOException)29 DateWritableV2 (org.apache.hadoop.hive.serde2.io.DateWritableV2)24 BytesWritable (org.apache.hadoop.io.BytesWritable)23 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)22 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)22 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)21 TimestampWritableV2 (org.apache.hadoop.hive.serde2.io.TimestampWritableV2)21 List (java.util.List)18