Search in sources :

Example 61 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class ColumnPrunerProcFactory method pruneReduceSinkOperator.

private static void pruneReduceSinkOperator(boolean[] retainFlags, ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx) throws SemanticException {
    ReduceSinkDesc reduceConf = reduce.getConf();
    Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
    LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap);
    RowSchema oldRS = reduce.getSchema();
    ArrayList<ColumnInfo> old_signature = oldRS.getSignature();
    ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>(old_signature);
    List<String> valueColNames = reduceConf.getOutputValueColumnNames();
    ArrayList<String> newValueColNames = new ArrayList<String>();
    List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols();
    List<ExprNodeDesc> valueExprs = reduceConf.getValueCols();
    ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < retainFlags.length; i++) {
        String outputCol = valueColNames.get(i);
        ExprNodeDesc outputColExpr = valueExprs.get(i);
        if (!retainFlags[i]) {
            ColumnInfo colInfo = oldRS.getColumnInfo(outputCol);
            if (colInfo == null) {
                outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
                colInfo = oldRS.getColumnInfo(outputCol);
            }
            // do row resolve once more because the ColumnInfo in row resolver is already removed
            if (colInfo == null) {
                continue;
            }
            // i.e. this column is not appearing in keyExprs of the RS
            if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
                oldMap.remove(outputCol);
                signature.remove(colInfo);
            }
        } else {
            newValueColNames.add(outputCol);
            newValueExprs.add(outputColExpr);
        }
    }
    oldRS.setSignature(signature);
    reduce.getSchema().setSignature(signature);
    reduceConf.setOutputValueColumnNames(newValueColNames);
    reduceConf.setValueCols(newValueExprs);
    TableDesc newValueTable = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(reduceConf.getValueCols(), newValueColNames, 0, ""));
    reduceConf.setValueSerializeInfo(newValueTable);
    LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 62 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestSymlinkTextInputFormat method setUp.

@Override
protected void setUp() throws IOException {
    conf = new Configuration();
    job = new JobConf(conf);
    TableDesc tblDesc = Utilities.defaultTd;
    PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
    LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
    pt.put(new Path("/tmp/testfolder"), partDesc);
    MapredWork mrwork = new MapredWork();
    mrwork.getMapWork().setPathToPartitionInfo(pt);
    Utilities.setMapRedWork(job, mrwork, new Path("/tmp/" + System.getProperty("user.name"), "hive"));
    fileSystem = FileSystem.getLocal(conf);
    testDir = new Path(System.getProperty("test.tmp.dir", System.getProperty("user.dir", new File(".").getAbsolutePath())) + "/TestSymlinkTextInputFormat");
    reporter = Reporter.NULL;
    fileSystem.delete(testDir, true);
    dataDir1 = new Path(testDir, "datadir1");
    dataDir2 = new Path(testDir, "datadir2");
    symlinkDir = new Path(testDir, "symlinkdir");
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File) LinkedHashMap(java.util.LinkedHashMap)

Example 63 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestCombineHiveInputFormat method testAvoidSplitCombination.

public void testAvoidSplitCombination() throws Exception {
    Configuration conf = new Configuration();
    JobConf job = new JobConf(conf);
    TableDesc tblDesc = Utilities.defaultTd;
    tblDesc.setInputFileFormatClass(TestSkipCombineInputFormat.class);
    PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
    LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
    pt.put(new Path("/tmp/testfolder1"), partDesc);
    pt.put(new Path("/tmp/testfolder2"), partDesc);
    MapredWork mrwork = new MapredWork();
    mrwork.getMapWork().setPathToPartitionInfo(pt);
    Path mapWorkPath = new Path("/tmp/" + System.getProperty("user.name"), "hive");
    Utilities.setMapRedWork(conf, mrwork, mapWorkPath);
    try {
        Path[] paths = new Path[2];
        paths[0] = new Path("/tmp/testfolder1");
        paths[1] = new Path("/tmp/testfolder2");
        CombineHiveInputFormat combineInputFormat = ReflectionUtils.newInstance(CombineHiveInputFormat.class, conf);
        combineInputFormat.pathToPartitionInfo = Utilities.getMapWork(conf).getPathToPartitionInfo();
        Set results = combineInputFormat.getNonCombinablePathIndices(job, paths, 2);
        assertEquals("Should have both path indices in the results set", 2, results.size());
    } finally {
        // Cleanup the mapwork path
        FileSystem.get(conf).delete(mapWorkPath, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Set(java.util.Set) Configuration(org.apache.hadoop.conf.Configuration) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) LinkedHashMap(java.util.LinkedHashMap)

Example 64 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestHiveBinarySearchRecordReader method init.

private void init() throws IOException {
    conf = new JobConf();
    resetIOContext();
    rcfReader = mock(RCFileRecordReader.class);
    when(rcfReader.next((LongWritable) anyObject(), (BytesRefArrayWritable) anyObject())).thenReturn(true);
    // Since the start is 0, and the length is 100, the first call to sync should be with the value
    // 50 so return that for getPos()
    when(rcfReader.getPos()).thenReturn(50L);
    conf.setBoolean("hive.input.format.sorted", true);
    TableDesc tblDesc = Utilities.defaultTd;
    PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
    LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
    pt.put(new Path("/tmp/testfolder"), partDesc);
    MapredWork mrwork = new MapredWork();
    mrwork.getMapWork().setPathToPartitionInfo(pt);
    Utilities.setMapRedWork(conf, mrwork, new Path("/tmp/" + System.getProperty("user.name"), "hive"));
    hiveSplit = new TestHiveInputSplit();
    hbsReader = new TestHiveRecordReader(rcfReader, conf);
    hbsReader.initIOContext(hiveSplit, conf, Class.class, rcfReader);
}
Also used : Path(org.apache.hadoop.fs.Path) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) LinkedHashMap(java.util.LinkedHashMap)

Example 65 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class ColumnPrunerProcFactory method pruneJoinOperator.

private static void pruneJoinOperator(NodeProcessorCtx ctx, CommonJoinOperator op, JoinDesc conf, Map<String, ExprNodeDesc> columnExprMap, Map<Byte, List<Integer>> retainMap, boolean mapJoin) throws SemanticException {
    ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
    List<Operator<? extends OperatorDesc>> childOperators = op.getChildOperators();
    LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs());
    if (cppCtx.genColLists(op) == null) {
        return;
    }
    List<FieldNode> neededColList = new ArrayList<>(cppCtx.genColLists(op));
    Map<Byte, List<FieldNode>> prunedColLists = new HashMap<>();
    for (byte tag : conf.getTagOrder()) {
        prunedColLists.put(tag, new ArrayList<FieldNode>());
    }
    // add the columns in join filters
    Set<Map.Entry<Byte, List<ExprNodeDesc>>> filters = conf.getFilters().entrySet();
    Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iter = filters.iterator();
    while (iter.hasNext()) {
        Map.Entry<Byte, List<ExprNodeDesc>> entry = iter.next();
        Byte tag = entry.getKey();
        for (ExprNodeDesc desc : entry.getValue()) {
            List<FieldNode> cols = prunedColLists.get(tag);
            cols = mergeFieldNodesWithDesc(cols, desc);
            prunedColLists.put(tag, cols);
        }
    }
    // add the columns in residual filters
    if (conf.getResidualFilterExprs() != null) {
        for (ExprNodeDesc desc : conf.getResidualFilterExprs()) {
            neededColList = mergeFieldNodesWithDesc(neededColList, desc);
        }
    }
    RowSchema joinRS = op.getSchema();
    ArrayList<String> outputCols = new ArrayList<String>();
    ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> newColExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < conf.getOutputColumnNames().size(); i++) {
        String internalName = conf.getOutputColumnNames().get(i);
        ExprNodeDesc desc = columnExprMap.get(internalName);
        Byte tag = conf.getReversedExprs().get(internalName);
        if (lookupColumn(neededColList, internalName) == null) {
            int index = conf.getExprs().get(tag).indexOf(desc);
            if (index < 0) {
                continue;
            }
            conf.getExprs().get(tag).remove(desc);
            if (retainMap != null) {
                retainMap.get(tag).remove(index);
            }
        } else {
            List<FieldNode> prunedRSList = prunedColLists.get(tag);
            if (prunedRSList == null) {
                prunedRSList = new ArrayList<>();
                prunedColLists.put(tag, prunedRSList);
            }
            prunedColLists.put(tag, mergeFieldNodesWithDesc(prunedRSList, desc));
            outputCols.add(internalName);
            newColExprMap.put(internalName, desc);
        }
    }
    if (mapJoin) {
        // regenerate the valueTableDesc
        List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
        for (int pos = 0; pos < op.getParentOperators().size(); pos++) {
            List<ExprNodeDesc> valueCols = conf.getExprs().get(Byte.valueOf((byte) pos));
            StringBuilder keyOrder = new StringBuilder();
            for (int i = 0; i < valueCols.size(); i++) {
                keyOrder.append("+");
            }
            TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));
            valueTableDescs.add(valueTableDesc);
        }
        ((MapJoinDesc) conf).setValueTblDescs(valueTableDescs);
        Set<Map.Entry<Byte, List<ExprNodeDesc>>> exprs = ((MapJoinDesc) conf).getKeys().entrySet();
        Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iters = exprs.iterator();
        while (iters.hasNext()) {
            Map.Entry<Byte, List<ExprNodeDesc>> entry = iters.next();
            List<ExprNodeDesc> lists = entry.getValue();
            for (int j = 0; j < lists.size(); j++) {
                ExprNodeDesc desc = lists.get(j);
                Byte tag = entry.getKey();
                List<FieldNode> cols = prunedColLists.get(tag);
                cols = mergeFieldNodesWithDesc(cols, desc);
                prunedColLists.put(tag, cols);
            }
        }
    }
    for (Operator<? extends OperatorDesc> child : childOperators) {
        if (child instanceof ReduceSinkOperator) {
            boolean[] flags = getPruneReduceSinkOpRetainFlags(toColumnNames(neededColList), (ReduceSinkOperator) child);
            pruneReduceSinkOperator(flags, (ReduceSinkOperator) child, cppCtx);
        }
    }
    for (int i = 0; i < outputCols.size(); i++) {
        String internalName = outputCols.get(i);
        ColumnInfo col = joinRS.getColumnInfo(internalName);
        rs.add(col);
    }
    LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs());
    op.setColumnExprMap(newColExprMap);
    conf.setOutputColumnNames(outputCols);
    op.getSchema().setSignature(rs);
    cppCtx.getJoinPrunedColLists().put(op, prunedColLists);
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PTFOperator(org.apache.hadoop.hive.ql.exec.PTFOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) LateralViewForwardOperator(org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) UDTFOperator(org.apache.hadoop.hive.ql.exec.UDTFOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)93 ArrayList (java.util.ArrayList)47 Path (org.apache.hadoop.fs.Path)34 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)29 HashMap (java.util.HashMap)26 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)26 LinkedHashMap (java.util.LinkedHashMap)23 Properties (java.util.Properties)19 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)19 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)18 Operator (org.apache.hadoop.hive.ql.exec.Operator)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)16 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)16 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)16 JobConf (org.apache.hadoop.mapred.JobConf)15 List (java.util.List)14 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)14 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)14 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)11 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)11