use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class MapJoinTestConfig method createMapJoinTableContainerSerDe.
public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoinDesc mapJoinDesc) throws SerDeException {
final Byte smallTablePos = 1;
// UNDONE: Why do we need to specify BinarySortableSerDe explicitly here???
TableDesc keyTableDesc = mapJoinDesc.getKeyTblDesc();
AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance(BinarySortableSerDe.class, null);
SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null);
MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false);
TableDesc valueTableDesc;
if (mapJoinDesc.getNoOuterJoin()) {
valueTableDesc = mapJoinDesc.getValueTblDescs().get(smallTablePos);
} else {
valueTableDesc = mapJoinDesc.getValueFilteredTblDescs().get(smallTablePos);
}
AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(mapJoinDesc, smallTablePos));
MapJoinTableContainerSerDe mapJoinTableContainerSerDe = new MapJoinTableContainerSerDe(keyContext, valueContext);
return mapJoinTableContainerSerDe;
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class MapJoinTestConfig method createMapJoinDesc.
public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) {
MapJoinDesc mapJoinDesc = new MapJoinDesc();
mapJoinDesc.setPosBigTable(0);
List<ExprNodeDesc> keyExpr = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < testDesc.bigTableKeyColumnNums.length; i++) {
keyExpr.add(new ExprNodeColumnDesc(testDesc.bigTableKeyTypeInfos[i], testDesc.bigTableKeyColumnNames[i], "B", false));
}
Map<Byte, List<ExprNodeDesc>> keyMap = new HashMap<Byte, List<ExprNodeDesc>>();
keyMap.put((byte) 0, keyExpr);
List<ExprNodeDesc> smallTableExpr = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < testDesc.smallTableValueColumnNames.length; i++) {
smallTableExpr.add(new ExprNodeColumnDesc(testDesc.smallTableValueTypeInfos[i], testDesc.smallTableValueColumnNames[i], "S", false));
}
keyMap.put((byte) 1, smallTableExpr);
mapJoinDesc.setKeys(keyMap);
mapJoinDesc.setExprs(keyMap);
Byte[] order = new Byte[] { (byte) 0, (byte) 1 };
mapJoinDesc.setTagOrder(order);
mapJoinDesc.setNoOuterJoin(testDesc.vectorMapJoinVariation != VectorMapJoinVariation.OUTER);
Map<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>();
// None.
filterMap.put((byte) 0, new ArrayList<ExprNodeDesc>());
mapJoinDesc.setFilters(filterMap);
List<Integer> bigTableRetainColumnNumsList = intArrayToList(testDesc.bigTableRetainColumnNums);
// For now, just small table values...
List<Integer> smallTableRetainColumnNumsList = intArrayToList(testDesc.smallTableRetainValueColumnNums);
Map<Byte, List<Integer>> retainListMap = new HashMap<Byte, List<Integer>>();
retainListMap.put((byte) 0, bigTableRetainColumnNumsList);
retainListMap.put((byte) 1, smallTableRetainColumnNumsList);
mapJoinDesc.setRetainList(retainListMap);
int joinDescType;
switch(testDesc.vectorMapJoinVariation) {
case INNER:
case INNER_BIG_ONLY:
joinDescType = JoinDesc.INNER_JOIN;
break;
case LEFT_SEMI:
joinDescType = JoinDesc.LEFT_SEMI_JOIN;
break;
case OUTER:
joinDescType = JoinDesc.LEFT_OUTER_JOIN;
break;
default:
throw new RuntimeException("unknown operator variation " + testDesc.vectorMapJoinVariation);
}
JoinCondDesc[] conds = new JoinCondDesc[1];
conds[0] = new JoinCondDesc(0, 1, joinDescType);
mapJoinDesc.setConds(conds);
TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(testDesc.hiveConf, PlanUtils.getFieldSchemasFromColumnList(keyExpr, ""));
mapJoinDesc.setKeyTblDesc(keyTableDesc);
TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(smallTableExpr, ""));
ArrayList<TableDesc> valueTableDescsList = new ArrayList<TableDesc>();
valueTableDescsList.add(null);
valueTableDescsList.add(valueTableDesc);
mapJoinDesc.setValueTblDescs(valueTableDescsList);
mapJoinDesc.setValueFilteredTblDescs(valueTableDescsList);
mapJoinDesc.setOutputColumnNames(Arrays.asList(testDesc.outputColumnNames));
return mapJoinDesc;
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class TestInputOutputFormat method createMockExecutionEnvironment.
/**
* Create a mock execution environment that has enough detail that
* ORC, vectorization, HiveInputFormat, and CombineHiveInputFormat don't
* explode.
* @param workDir a local filesystem work directory
* @param warehouseDir a mock filesystem warehouse directory
* @param tableName the table name
* @param objectInspector object inspector for the row
* @param isVectorized should run vectorized
* @return a JobConf that contains the necessary information
* @throws IOException
* @throws HiveException
*/
JobConf createMockExecutionEnvironment(Path workDir, Path warehouseDir, String tableName, ObjectInspector objectInspector, boolean isVectorized, int partitions) throws IOException, HiveException {
JobConf conf = new JobConf();
Utilities.clearWorkMap(conf);
conf.set("hive.exec.plan", workDir.toString());
conf.set("mapred.job.tracker", "local");
String isVectorizedString = Boolean.toString(isVectorized);
conf.set("hive.vectorized.execution.enabled", isVectorizedString);
conf.set(Utilities.VECTOR_MODE, isVectorizedString);
conf.set(Utilities.USE_VECTORIZED_INPUT_FILE_FORMAT, isVectorizedString);
conf.set("fs.mock.impl", MockFileSystem.class.getName());
conf.set("mapred.mapper.class", ExecMapper.class.getName());
Path root = new Path(warehouseDir, tableName);
// clean out previous contents
((MockFileSystem) root.getFileSystem(conf)).clear();
// build partition strings
String[] partPath = new String[partitions];
StringBuilder buffer = new StringBuilder();
for (int p = 0; p < partitions; ++p) {
partPath[p] = new Path(root, "p=" + p).toString();
if (p != 0) {
buffer.append(',');
}
buffer.append(partPath[p]);
}
conf.set("mapred.input.dir", buffer.toString());
StringBuilder columnIds = new StringBuilder();
StringBuilder columnNames = new StringBuilder();
StringBuilder columnTypes = new StringBuilder();
StructObjectInspector structOI = (StructObjectInspector) objectInspector;
List<? extends StructField> fields = structOI.getAllStructFieldRefs();
int numCols = fields.size();
for (int i = 0; i < numCols; ++i) {
if (i != 0) {
columnIds.append(',');
columnNames.append(',');
columnTypes.append(',');
}
columnIds.append(i);
columnNames.append(fields.get(i).getFieldName());
columnTypes.append(fields.get(i).getFieldObjectInspector().getTypeName());
}
conf.set("hive.io.file.readcolumn.ids", columnIds.toString());
conf.set("partition_columns", "p");
conf.set(serdeConstants.LIST_COLUMNS, columnNames.toString());
conf.set(serdeConstants.LIST_COLUMN_TYPES, columnTypes.toString());
MockFileSystem fs = (MockFileSystem) warehouseDir.getFileSystem(conf);
fs.clear();
Properties tblProps = new Properties();
tblProps.put("name", tableName);
tblProps.put("serialization.lib", OrcSerde.class.getName());
tblProps.put("columns", columnNames.toString());
tblProps.put("columns.types", columnTypes.toString());
TableDesc tbl = new TableDesc(OrcInputFormat.class, OrcOutputFormat.class, tblProps);
MapWork mapWork = new MapWork();
mapWork.setVectorMode(isVectorized);
if (isVectorized) {
VectorizedRowBatchCtx vectorizedRowBatchCtx = new VectorizedRowBatchCtx();
vectorizedRowBatchCtx.init(structOI, new String[0]);
mapWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx);
}
mapWork.setUseBucketizedHiveInputFormat(false);
LinkedHashMap<Path, ArrayList<String>> aliasMap = new LinkedHashMap<>();
ArrayList<String> aliases = new ArrayList<String>();
aliases.add(tableName);
LinkedHashMap<Path, PartitionDesc> partMap = new LinkedHashMap<>();
for (int p = 0; p < partitions; ++p) {
Path path = new Path(partPath[p]);
aliasMap.put(path, aliases);
LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>();
PartitionDesc part = new PartitionDesc(tbl, partSpec);
if (isVectorized) {
part.setVectorPartitionDesc(VectorPartitionDesc.createVectorizedInputFileFormat("MockInputFileFormatClassName", false));
}
partMap.put(path, part);
}
mapWork.setPathToAliases(aliasMap);
mapWork.setPathToPartitionInfo(partMap);
// write the plan out
FileSystem localFs = FileSystem.getLocal(conf).getRaw();
Path mapXml = new Path(workDir, "map.xml");
localFs.delete(mapXml, true);
FSDataOutputStream planStream = localFs.create(mapXml);
SerializationUtilities.serializePlan(mapWork, planStream);
conf.setBoolean(Utilities.HAS_MAP_WORK, true);
planStream.close();
return conf;
}
Aggregations