use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class ColumnPrunerProcFactory method pruneReduceSinkOperator.
private static void pruneReduceSinkOperator(boolean[] retainFlags, ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx) throws SemanticException {
ReduceSinkDesc reduceConf = reduce.getConf();
Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap);
RowSchema oldRS = reduce.getSchema();
ArrayList<ColumnInfo> old_signature = oldRS.getSignature();
ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>(old_signature);
List<String> valueColNames = reduceConf.getOutputValueColumnNames();
ArrayList<String> newValueColNames = new ArrayList<String>();
List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols();
List<ExprNodeDesc> valueExprs = reduceConf.getValueCols();
ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < retainFlags.length; i++) {
String outputCol = valueColNames.get(i);
ExprNodeDesc outputColExpr = valueExprs.get(i);
if (!retainFlags[i]) {
ColumnInfo colInfo = oldRS.getColumnInfo(outputCol);
if (colInfo == null) {
outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
colInfo = oldRS.getColumnInfo(outputCol);
}
// do row resolve once more because the ColumnInfo in row resolver is already removed
if (colInfo == null) {
continue;
}
// i.e. this column is not appearing in keyExprs of the RS
if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
oldMap.remove(outputCol);
signature.remove(colInfo);
}
} else {
newValueColNames.add(outputCol);
newValueExprs.add(outputColExpr);
}
}
oldRS.setSignature(signature);
reduce.getSchema().setSignature(signature);
reduceConf.setOutputValueColumnNames(newValueColNames);
reduceConf.setValueCols(newValueExprs);
TableDesc newValueTable = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(reduceConf.getValueCols(), newValueColNames, 0, ""));
reduceConf.setValueSerializeInfo(newValueTable);
LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap);
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class TestSymlinkTextInputFormat method setUp.
@Override
protected void setUp() throws IOException {
conf = new Configuration();
job = new JobConf(conf);
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Utilities.setMapRedWork(job, mrwork, new Path("/tmp/" + System.getProperty("user.name"), "hive"));
fileSystem = FileSystem.getLocal(conf);
testDir = new Path(System.getProperty("test.tmp.dir", System.getProperty("user.dir", new File(".").getAbsolutePath())) + "/TestSymlinkTextInputFormat");
reporter = Reporter.NULL;
fileSystem.delete(testDir, true);
dataDir1 = new Path(testDir, "datadir1");
dataDir2 = new Path(testDir, "datadir2");
symlinkDir = new Path(testDir, "symlinkdir");
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class TestCombineHiveInputFormat method testAvoidSplitCombination.
public void testAvoidSplitCombination() throws Exception {
Configuration conf = new Configuration();
JobConf job = new JobConf(conf);
TableDesc tblDesc = Utilities.defaultTd;
tblDesc.setInputFileFormatClass(TestSkipCombineInputFormat.class);
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder1"), partDesc);
pt.put(new Path("/tmp/testfolder2"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Path mapWorkPath = new Path("/tmp/" + System.getProperty("user.name"), "hive");
Utilities.setMapRedWork(conf, mrwork, mapWorkPath);
try {
Path[] paths = new Path[2];
paths[0] = new Path("/tmp/testfolder1");
paths[1] = new Path("/tmp/testfolder2");
CombineHiveInputFormat combineInputFormat = ReflectionUtils.newInstance(CombineHiveInputFormat.class, conf);
combineInputFormat.pathToPartitionInfo = Utilities.getMapWork(conf).getPathToPartitionInfo();
Set results = combineInputFormat.getNonCombinablePathIndices(job, paths, 2);
assertEquals("Should have both path indices in the results set", 2, results.size());
} finally {
// Cleanup the mapwork path
FileSystem.get(conf).delete(mapWorkPath, true);
}
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class TestHiveBinarySearchRecordReader method init.
private void init() throws IOException {
conf = new JobConf();
resetIOContext();
rcfReader = mock(RCFileRecordReader.class);
when(rcfReader.next((LongWritable) anyObject(), (BytesRefArrayWritable) anyObject())).thenReturn(true);
// Since the start is 0, and the length is 100, the first call to sync should be with the value
// 50 so return that for getPos()
when(rcfReader.getPos()).thenReturn(50L);
conf.setBoolean("hive.input.format.sorted", true);
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Utilities.setMapRedWork(conf, mrwork, new Path("/tmp/" + System.getProperty("user.name"), "hive"));
hiveSplit = new TestHiveInputSplit();
hbsReader = new TestHiveRecordReader(rcfReader, conf);
hbsReader.initIOContext(hiveSplit, conf, Class.class, rcfReader);
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class ColumnPrunerProcFactory method pruneJoinOperator.
private static void pruneJoinOperator(NodeProcessorCtx ctx, CommonJoinOperator op, JoinDesc conf, Map<String, ExprNodeDesc> columnExprMap, Map<Byte, List<Integer>> retainMap, boolean mapJoin) throws SemanticException {
ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
List<Operator<? extends OperatorDesc>> childOperators = op.getChildOperators();
LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs());
if (cppCtx.genColLists(op) == null) {
return;
}
List<FieldNode> neededColList = new ArrayList<>(cppCtx.genColLists(op));
Map<Byte, List<FieldNode>> prunedColLists = new HashMap<>();
for (byte tag : conf.getTagOrder()) {
prunedColLists.put(tag, new ArrayList<FieldNode>());
}
// add the columns in join filters
Set<Map.Entry<Byte, List<ExprNodeDesc>>> filters = conf.getFilters().entrySet();
Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iter = filters.iterator();
while (iter.hasNext()) {
Map.Entry<Byte, List<ExprNodeDesc>> entry = iter.next();
Byte tag = entry.getKey();
for (ExprNodeDesc desc : entry.getValue()) {
List<FieldNode> cols = prunedColLists.get(tag);
cols = mergeFieldNodesWithDesc(cols, desc);
prunedColLists.put(tag, cols);
}
}
// add the columns in residual filters
if (conf.getResidualFilterExprs() != null) {
for (ExprNodeDesc desc : conf.getResidualFilterExprs()) {
neededColList = mergeFieldNodesWithDesc(neededColList, desc);
}
}
RowSchema joinRS = op.getSchema();
ArrayList<String> outputCols = new ArrayList<String>();
ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> newColExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < conf.getOutputColumnNames().size(); i++) {
String internalName = conf.getOutputColumnNames().get(i);
ExprNodeDesc desc = columnExprMap.get(internalName);
Byte tag = conf.getReversedExprs().get(internalName);
if (lookupColumn(neededColList, internalName) == null) {
int index = conf.getExprs().get(tag).indexOf(desc);
if (index < 0) {
continue;
}
conf.getExprs().get(tag).remove(desc);
if (retainMap != null) {
retainMap.get(tag).remove(index);
}
} else {
List<FieldNode> prunedRSList = prunedColLists.get(tag);
if (prunedRSList == null) {
prunedRSList = new ArrayList<>();
prunedColLists.put(tag, prunedRSList);
}
prunedColLists.put(tag, mergeFieldNodesWithDesc(prunedRSList, desc));
outputCols.add(internalName);
newColExprMap.put(internalName, desc);
}
}
if (mapJoin) {
// regenerate the valueTableDesc
List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
for (int pos = 0; pos < op.getParentOperators().size(); pos++) {
List<ExprNodeDesc> valueCols = conf.getExprs().get(Byte.valueOf((byte) pos));
StringBuilder keyOrder = new StringBuilder();
for (int i = 0; i < valueCols.size(); i++) {
keyOrder.append("+");
}
TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));
valueTableDescs.add(valueTableDesc);
}
((MapJoinDesc) conf).setValueTblDescs(valueTableDescs);
Set<Map.Entry<Byte, List<ExprNodeDesc>>> exprs = ((MapJoinDesc) conf).getKeys().entrySet();
Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iters = exprs.iterator();
while (iters.hasNext()) {
Map.Entry<Byte, List<ExprNodeDesc>> entry = iters.next();
List<ExprNodeDesc> lists = entry.getValue();
for (int j = 0; j < lists.size(); j++) {
ExprNodeDesc desc = lists.get(j);
Byte tag = entry.getKey();
List<FieldNode> cols = prunedColLists.get(tag);
cols = mergeFieldNodesWithDesc(cols, desc);
prunedColLists.put(tag, cols);
}
}
}
for (Operator<? extends OperatorDesc> child : childOperators) {
if (child instanceof ReduceSinkOperator) {
boolean[] flags = getPruneReduceSinkOpRetainFlags(toColumnNames(neededColList), (ReduceSinkOperator) child);
pruneReduceSinkOperator(flags, (ReduceSinkOperator) child, cppCtx);
}
}
for (int i = 0; i < outputCols.size(); i++) {
String internalName = outputCols.get(i);
ColumnInfo col = joinRS.getColumnInfo(internalName);
rs.add(col);
}
LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs());
op.setColumnExprMap(newColExprMap);
conf.setOutputColumnNames(outputCols);
op.getSchema().setSignature(rs);
cppCtx.getJoinPrunedColLists().put(op, prunedColLists);
}
Aggregations