use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class GenSparkSkewJoinProcessor method processSkewJoin.
@SuppressWarnings("unchecked")
public static void processSkewJoin(JoinOperator joinOp, Task<?> currTask, ReduceWork reduceWork, ParseContext parseCtx) throws SemanticException {
SparkWork currentWork = ((SparkTask) currTask).getWork();
if (currentWork.getChildren(reduceWork).size() > 0) {
LOG.warn("Skip runtime skew join as the ReduceWork has child work and hasn't been split.");
return;
}
List<Task<?>> children = currTask.getChildTasks();
Path baseTmpDir = parseCtx.getContext().getMRTmpPath();
JoinDesc joinDescriptor = joinOp.getConf();
Map<Byte, List<ExprNodeDesc>> joinValues = joinDescriptor.getExprs();
int numAliases = joinValues.size();
Map<Byte, Path> bigKeysDirMap = new HashMap<Byte, Path>();
Map<Byte, Map<Byte, Path>> smallKeysDirMap = new HashMap<Byte, Map<Byte, Path>>();
Map<Byte, Path> skewJoinJobResultsDir = new HashMap<Byte, Path>();
Byte[] tags = joinDescriptor.getTagOrder();
// for each joining table, set dir for big key and small keys properly
for (int i = 0; i < numAliases; i++) {
Byte alias = tags[i];
bigKeysDirMap.put(alias, GenMRSkewJoinProcessor.getBigKeysDir(baseTmpDir, alias));
Map<Byte, Path> smallKeysMap = new HashMap<Byte, Path>();
smallKeysDirMap.put(alias, smallKeysMap);
for (Byte src2 : tags) {
if (!src2.equals(alias)) {
smallKeysMap.put(src2, GenMRSkewJoinProcessor.getSmallKeysDir(baseTmpDir, alias, src2));
}
}
skewJoinJobResultsDir.put(alias, GenMRSkewJoinProcessor.getBigKeysSkewJoinResultDir(baseTmpDir, alias));
}
joinDescriptor.setHandleSkewJoin(true);
joinDescriptor.setBigKeysDirMap(bigKeysDirMap);
joinDescriptor.setSmallKeysDirMap(smallKeysDirMap);
joinDescriptor.setSkewKeyDefinition(HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVESKEWJOINKEY));
// create proper table/column desc for spilled tables
TableDesc keyTblDesc = (TableDesc) reduceWork.getKeyDesc().clone();
List<String> joinKeys = Utilities.getColumnNames(keyTblDesc.getProperties());
List<String> joinKeyTypes = Utilities.getColumnTypes(keyTblDesc.getProperties());
Map<Byte, TableDesc> tableDescList = new HashMap<Byte, TableDesc>();
Map<Byte, RowSchema> rowSchemaList = new HashMap<Byte, RowSchema>();
Map<Byte, List<ExprNodeDesc>> newJoinValues = new HashMap<Byte, List<ExprNodeDesc>>();
Map<Byte, List<ExprNodeDesc>> newJoinKeys = new HashMap<Byte, List<ExprNodeDesc>>();
// used for create mapJoinDesc, should be in order
List<TableDesc> newJoinValueTblDesc = new ArrayList<TableDesc>();
for (int i = 0; i < tags.length; i++) {
newJoinValueTblDesc.add(null);
}
for (int i = 0; i < numAliases; i++) {
Byte alias = tags[i];
List<ExprNodeDesc> valueCols = joinValues.get(alias);
String colNames = "";
String colTypes = "";
int columnSize = valueCols.size();
List<ExprNodeDesc> newValueExpr = new ArrayList<ExprNodeDesc>();
List<ExprNodeDesc> newKeyExpr = new ArrayList<ExprNodeDesc>();
ArrayList<ColumnInfo> columnInfos = new ArrayList<ColumnInfo>();
boolean first = true;
for (int k = 0; k < columnSize; k++) {
TypeInfo type = valueCols.get(k).getTypeInfo();
// any name, it does not matter.
String newColName = i + "_VALUE_" + k;
ColumnInfo columnInfo = new ColumnInfo(newColName, type, alias.toString(), false);
columnInfos.add(columnInfo);
newValueExpr.add(new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), false));
if (!first) {
colNames = colNames + ",";
colTypes = colTypes + ",";
}
first = false;
colNames = colNames + newColName;
colTypes = colTypes + valueCols.get(k).getTypeString();
}
// we are putting join keys at last part of the spilled table
for (int k = 0; k < joinKeys.size(); k++) {
if (!first) {
colNames = colNames + ",";
colTypes = colTypes + ",";
}
first = false;
colNames = colNames + joinKeys.get(k);
colTypes = colTypes + joinKeyTypes.get(k);
ColumnInfo columnInfo = new ColumnInfo(joinKeys.get(k), TypeInfoFactory.getPrimitiveTypeInfo(joinKeyTypes.get(k)), alias.toString(), false);
columnInfos.add(columnInfo);
newKeyExpr.add(new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), false));
}
newJoinValues.put(alias, newValueExpr);
newJoinKeys.put(alias, newKeyExpr);
tableDescList.put(alias, Utilities.getTableDesc(colNames, colTypes));
rowSchemaList.put(alias, new RowSchema(columnInfos));
// construct value table Desc
String valueColNames = "";
String valueColTypes = "";
first = true;
for (int k = 0; k < columnSize; k++) {
// any name, it does not matter.
String newColName = i + "_VALUE_" + k;
if (!first) {
valueColNames = valueColNames + ",";
valueColTypes = valueColTypes + ",";
}
valueColNames = valueColNames + newColName;
valueColTypes = valueColTypes + valueCols.get(k).getTypeString();
first = false;
}
newJoinValueTblDesc.set((byte) i, Utilities.getTableDesc(valueColNames, valueColTypes));
}
joinDescriptor.setSkewKeysValuesTables(tableDescList);
joinDescriptor.setKeyTableDesc(keyTblDesc);
// create N-1 map join tasks
HashMap<Path, Task<?>> bigKeysDirToTaskMap = new HashMap<Path, Task<?>>();
List<Serializable> listWorks = new ArrayList<Serializable>();
List<Task<?>> listTasks = new ArrayList<Task<?>>();
for (int i = 0; i < numAliases - 1; i++) {
Byte src = tags[i];
HiveConf hiveConf = new HiveConf(parseCtx.getConf(), GenSparkSkewJoinProcessor.class);
SparkWork sparkWork = new SparkWork(parseCtx.getConf().getVar(HiveConf.ConfVars.HIVEQUERYID));
Task<?> skewJoinMapJoinTask = TaskFactory.get(sparkWork);
skewJoinMapJoinTask.setFetchSource(currTask.isFetchSource());
// create N TableScans
Operator<? extends OperatorDesc>[] parentOps = new TableScanOperator[tags.length];
for (int k = 0; k < tags.length; k++) {
Operator<? extends OperatorDesc> ts = GenMapRedUtils.createTemporaryTableScanOperator(joinOp.getCompilationOpContext(), rowSchemaList.get((byte) k));
((TableScanOperator) ts).setTableDescSkewJoin(tableDescList.get((byte) k));
parentOps[k] = ts;
}
// create the MapJoinOperator
String dumpFilePrefix = "mapfile" + PlanUtils.getCountForMapJoinDumpFilePrefix();
MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc, newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc, joinDescriptor.getOutputColumnNames(), i, joinDescriptor.getConds(), joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix, joinDescriptor.getMemoryMonitorInfo(), joinDescriptor.getInMemoryDataSize());
mapJoinDescriptor.setTagOrder(tags);
mapJoinDescriptor.setHandleSkewJoin(false);
mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes());
mapJoinDescriptor.setColumnExprMap(joinDescriptor.getColumnExprMap());
// temporarily, mark it as child of all the TS
MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(joinOp.getCompilationOpContext(), mapJoinDescriptor, null, parentOps);
// clone the original join operator, and replace it with the MJ
// this makes sure MJ has the same downstream operator plan as the original join
List<Operator<?>> reducerList = new ArrayList<Operator<?>>();
reducerList.add(reduceWork.getReducer());
Operator<? extends OperatorDesc> reducer = SerializationUtilities.cloneOperatorTree(reducerList).get(0);
Preconditions.checkArgument(reducer instanceof JoinOperator, "Reducer should be join operator, but actually is " + reducer.getName());
JoinOperator cloneJoinOp = (JoinOperator) reducer;
List<Operator<? extends OperatorDesc>> childOps = cloneJoinOp.getChildOperators();
for (Operator<? extends OperatorDesc> childOp : childOps) {
childOp.replaceParent(cloneJoinOp, mapJoinOp);
}
mapJoinOp.setChildOperators(childOps);
// set memory usage for the MJ operator
setMemUsage(mapJoinOp, skewJoinMapJoinTask, parseCtx);
// create N MapWorks and add them to the SparkWork
MapWork bigMapWork = null;
Map<Byte, Path> smallTblDirs = smallKeysDirMap.get(src);
for (int j = 0; j < tags.length; j++) {
MapWork mapWork = PlanUtils.getMapRedWork().getMapWork();
sparkWork.add(mapWork);
// This code has been only added for testing
boolean mapperCannotSpanPartns = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
mapWork.setMapperCannotSpanPartns(mapperCannotSpanPartns);
Operator<? extends OperatorDesc> tableScan = parentOps[j];
String alias = tags[j].toString();
ArrayList<String> aliases = new ArrayList<String>();
aliases.add(alias);
Path path;
if (j == i) {
path = bigKeysDirMap.get(tags[j]);
bigKeysDirToTaskMap.put(path, skewJoinMapJoinTask);
bigMapWork = mapWork;
} else {
path = smallTblDirs.get(tags[j]);
}
mapWork.addPathToAlias(path, aliases);
mapWork.getAliasToWork().put(alias, tableScan);
PartitionDesc partitionDesc = new PartitionDesc(tableDescList.get(tags[j]), null);
mapWork.addPathToPartitionInfo(path, partitionDesc);
mapWork.getAliasToPartnInfo().put(alias, partitionDesc);
mapWork.setName("Map " + GenSparkUtils.getUtils().getNextSeqNumber());
}
// connect all small dir map work to the big dir map work
Preconditions.checkArgument(bigMapWork != null, "Haven't identified big dir MapWork");
// these 2 flags are intended only for the big-key map work
bigMapWork.setNumMapTasks(HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVESKEWJOINMAPJOINNUMMAPTASK));
bigMapWork.setMinSplitSize(HiveConf.getLongVar(hiveConf, HiveConf.ConfVars.HIVESKEWJOINMAPJOINMINSPLIT));
// use HiveInputFormat so that we can control the number of map tasks
bigMapWork.setInputformat(HiveInputFormat.class.getName());
for (BaseWork work : sparkWork.getRoots()) {
Preconditions.checkArgument(work instanceof MapWork, "All root work should be MapWork, but got " + work.getClass().getSimpleName());
if (work != bigMapWork) {
sparkWork.connect(work, bigMapWork, new SparkEdgeProperty(SparkEdgeProperty.SHUFFLE_NONE));
}
}
// insert SparkHashTableSink and Dummy operators
for (int j = 0; j < tags.length; j++) {
if (j != i) {
insertSHTS(tags[j], (TableScanOperator) parentOps[j], bigMapWork);
}
}
listWorks.add(skewJoinMapJoinTask.getWork());
listTasks.add(skewJoinMapJoinTask);
}
if (children != null) {
for (Task<?> tsk : listTasks) {
for (Task<?> oldChild : children) {
tsk.addDependentTask(oldChild);
}
}
currTask.setChildTasks(new ArrayList<Task<?>>());
for (Task<?> oldChild : children) {
oldChild.getParentTasks().remove(currTask);
}
listTasks.addAll(children);
for (Task<?> oldChild : children) {
listWorks.add(oldChild.getWork());
}
}
ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx context = new ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx(bigKeysDirToTaskMap, children);
ConditionalWork cndWork = new ConditionalWork(listWorks);
ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork);
cndTsk.setListTasks(listTasks);
cndTsk.setResolver(new ConditionalResolverSkewJoin());
cndTsk.setResolverCtx(context);
currTask.setChildTasks(new ArrayList<Task<?>>());
currTask.addDependentTask(cndTsk);
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestPlan method testPlan.
@Test
public void testPlan() throws Exception {
final String f1 = "#affiliations";
final String f2 = "friends[0].friendid";
try {
// initialize a complete map reduce configuration
ExprNodeDesc expr1 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, f1, "", false);
ExprNodeDesc expr2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, f2, "", false);
ExprNodeDesc filterExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("==", expr1, expr2);
FilterDesc filterCtx = new FilterDesc(filterExpr, false);
Operator<FilterDesc> op = OperatorFactory.get(new CompilationOpContext(), FilterDesc.class);
op.setConf(filterCtx);
ArrayList<String> aliasList = new ArrayList<String>();
aliasList.add("a");
Map<Path, List<String>> pa = new LinkedHashMap<>();
pa.put(new Path("/tmp/testfolder"), aliasList);
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
pt.put(new Path("/tmp/testfolder"), partDesc);
LinkedHashMap<String, Operator<? extends OperatorDesc>> ao = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
ao.put("a", op);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToAliases(pa);
mrwork.getMapWork().setPathToPartitionInfo(pt);
mrwork.getMapWork().setAliasToWork(ao);
JobConf job = new JobConf(TestPlan.class);
// serialize the configuration once ..
ByteArrayOutputStream baos = new ByteArrayOutputStream();
SerializationUtilities.serializePlan(mrwork, baos);
baos.close();
String v1 = baos.toString();
// store into configuration
job.set("fs.default.name", "file:///");
Utilities.setMapRedWork(job, mrwork, new Path(System.getProperty("java.io.tmpdir") + File.separator + System.getProperty("user.name") + File.separator + "hive"));
MapredWork mrwork2 = Utilities.getMapRedWork(job);
Utilities.clearWork(job);
// over here we should have some checks of the deserialized object against
// the orginal object
// System.out.println(v1);
// serialize again
baos.reset();
SerializationUtilities.serializePlan(mrwork2, baos);
baos.close();
// verify that the two are equal
assertEquals(v1, baos.toString());
} catch (Exception excp) {
excp.printStackTrace();
throw excp;
}
System.out.println("Serialization/Deserialization of plan successful");
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestUtilities method testGetInputPathsWithMultipleThreadsAndEmptyPartitions.
/**
* Check that calling {@link Utilities#getInputPaths(JobConf, MapWork, Path, Context, boolean)}
* can process two different tables that both have empty partitions when using multiple threads.
* Some extra logic is placed at the end of the test to validate no race conditions put the
* {@link MapWork} object in an invalid state.
*/
@Test
public void testGetInputPathsWithMultipleThreadsAndEmptyPartitions() throws Exception {
int numPartitions = 15;
JobConf jobConf = new JobConf();
jobConf.setInt(HiveConf.ConfVars.HIVE_EXEC_INPUT_LISTING_MAX_THREADS.varname, Runtime.getRuntime().availableProcessors() * 2);
MapWork mapWork = new MapWork();
Path testTablePath = new Path("testTable");
Path[] testPartitionsPaths = new Path[numPartitions];
PartitionDesc mockPartitionDesc = mock(PartitionDesc.class);
TableDesc mockTableDesc = mock(TableDesc.class);
when(mockTableDesc.isNonNative()).thenReturn(false);
when(mockTableDesc.getProperties()).thenReturn(new Properties());
when(mockPartitionDesc.getProperties()).thenReturn(new Properties());
when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc);
doReturn(HiveSequenceFileOutputFormat.class).when(mockPartitionDesc).getOutputFileFormatClass();
for (int i = 0; i < numPartitions; i++) {
String testPartitionName = "p=" + i;
testPartitionsPaths[i] = new Path(testTablePath, "p=" + i);
mapWork.getPathToAliases().put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
mapWork.getAliasToWork().put(testPartitionName, mock(Operator.class));
mapWork.getPathToPartitionInfo().put(testPartitionsPaths[i], mockPartitionDesc);
}
FileSystem fs = FileSystem.getLocal(jobConf);
try {
fs.mkdirs(testTablePath);
List<Path> inputPaths = Utilities.getInputPaths(jobConf, mapWork, new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR)), mock(Context.class), false);
assertEquals(inputPaths.size(), numPartitions);
for (int i = 0; i < numPartitions; i++) {
assertNotEquals(inputPaths.get(i), testPartitionsPaths[i]);
}
assertEquals(mapWork.getPathToAliases().size(), numPartitions);
assertEquals(mapWork.getPathToPartitionInfo().size(), numPartitions);
assertEquals(mapWork.getAliasToWork().size(), numPartitions);
for (Map.Entry<Path, List<String>> entry : mapWork.getPathToAliases().entrySet()) {
assertNotNull(entry.getKey());
assertNotNull(entry.getValue());
assertEquals(entry.getValue().size(), 1);
assertTrue(entry.getKey().getFileSystem(new Configuration()).exists(entry.getKey()));
}
} finally {
if (fs.exists(testTablePath)) {
fs.delete(testTablePath, true);
}
}
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestSerializationUtilities method mockMapWorkWithSomePartitionDescProperties.
private static MapWork mockMapWorkWithSomePartitionDescProperties() throws Exception {
String tableName = "test_table";
int numPartitions = 2;
Path root = new Path("/warehouse", "test_table");
String[] partPath = new String[numPartitions];
StringBuilder buffer = new StringBuilder();
for (int p = 0; p < numPartitions; ++p) {
partPath[p] = new Path(root, "p=" + p).toString();
if (p != 0) {
buffer.append(',');
}
buffer.append(partPath[p]);
}
Properties tblProps = new Properties();
TableDesc tbl = new TableDesc(OrcInputFormat.class, OrcOutputFormat.class, tblProps);
MapWork mapWork = new MapWork();
Map<Path, List<String>> aliasMap = new LinkedHashMap<>();
List<String> aliases = new ArrayList<String>();
aliases.add(tableName);
LinkedHashMap<Path, PartitionDesc> partMap = new LinkedHashMap<>();
for (int p = 0; p < numPartitions; ++p) {
Path path = new Path(partPath[p]);
aliasMap.put(path, aliases);
LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>();
PartitionDesc part = new PartitionDesc(tbl, partSpec);
part.setVectorPartitionDesc(VectorPartitionDesc.createVectorizedInputFileFormat("MockInputFileFormatClassName", false, null));
part.getProperties().put("impala_intermediate_stats_chunk1", "asdfghjk12345678");
part.getProperties().put("impala_intermediate_stats_chunk2", "asdfghjk12345678");
part.getProperties().put("rawDataSize", "10");
part.getProperties().put("serialization.ddl", "asdf");
partMap.put(path, part);
}
mapWork.setPathToAliases(aliasMap);
mapWork.setPathToPartitionInfo(partMap);
return mapWork;
}
use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.
the class TestInputOutputFormat method createMockExecutionEnvironment.
/**
* Create a mock execution environment that has enough detail that
* ORC, vectorization, HiveInputFormat, and CombineHiveInputFormat don't
* explode.
* @param workDir a local filesystem work directory
* @param warehouseDir a mock filesystem warehouse directory
* @param tableName the table name
* @param objectInspector object inspector for the row
* @param isVectorized should run vectorized
* @return a JobConf that contains the necessary information
* @throws IOException
* @throws HiveException
*/
static JobConf createMockExecutionEnvironment(Path workDir, Path warehouseDir, String tableName, ObjectInspector objectInspector, boolean isVectorized, int partitions, String currFileSystemName) throws IOException, HiveException {
JobConf conf = new JobConf();
Utilities.clearWorkMap(conf);
conf.set("hive.exec.plan", workDir.toString());
conf.set("mapred.job.tracker", "local");
String isVectorizedString = Boolean.toString(isVectorized);
conf.set("hive.vectorized.execution.enabled", isVectorizedString);
conf.set(Utilities.VECTOR_MODE, isVectorizedString);
conf.set(Utilities.USE_VECTORIZED_INPUT_FILE_FORMAT, isVectorizedString);
conf.set("fs.mock.impl", currFileSystemName);
conf.set("mapred.mapper.class", ExecMapper.class.getName());
Path root = new Path(warehouseDir, tableName);
// clean out previous contents
((MockFileSystem) root.getFileSystem(conf)).clear();
// build partition strings
String[] partPath = new String[partitions];
StringBuilder buffer = new StringBuilder();
for (int p = 0; p < partitions; ++p) {
partPath[p] = new Path(root, "p=" + p).toString();
if (p != 0) {
buffer.append(',');
}
buffer.append(partPath[p]);
}
conf.set("mapred.input.dir", buffer.toString());
StringBuilder columnIds = new StringBuilder();
StringBuilder columnNames = new StringBuilder();
StringBuilder columnTypes = new StringBuilder();
StructObjectInspector structOI = (StructObjectInspector) objectInspector;
List<? extends StructField> fields = structOI.getAllStructFieldRefs();
int numCols = fields.size();
for (int i = 0; i < numCols; ++i) {
if (i != 0) {
columnIds.append(',');
columnNames.append(',');
columnTypes.append(',');
}
columnIds.append(i);
columnNames.append(fields.get(i).getFieldName());
columnTypes.append(fields.get(i).getFieldObjectInspector().getTypeName());
}
conf.set("hive.io.file.readcolumn.ids", columnIds.toString());
conf.set("partition_columns", "p");
conf.set(serdeConstants.LIST_COLUMNS, columnNames.toString());
conf.set(serdeConstants.LIST_COLUMN_TYPES, columnTypes.toString());
MockFileSystem fs = (MockFileSystem) warehouseDir.getFileSystem(conf);
fs.clear();
Properties tblProps = new Properties();
tblProps.put("name", tableName);
tblProps.put("serialization.lib", OrcSerde.class.getName());
tblProps.put("columns", columnNames.toString());
tblProps.put("columns.types", columnTypes.toString());
TableDesc tbl = new TableDesc(OrcInputFormat.class, OrcOutputFormat.class, tblProps);
MapWork mapWork = new MapWork();
mapWork.setVectorMode(isVectorized);
if (isVectorized) {
VectorizedRowBatchCtx vectorizedRowBatchCtx = new VectorizedRowBatchCtx();
vectorizedRowBatchCtx.init(structOI, new String[0]);
mapWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx);
}
mapWork.setUseBucketizedHiveInputFormat(false);
Map<Path, List<String>> aliasMap = new LinkedHashMap<>();
List<String> aliases = new ArrayList<String>();
aliases.add(tableName);
LinkedHashMap<Path, PartitionDesc> partMap = new LinkedHashMap<>();
for (int p = 0; p < partitions; ++p) {
Path path = new Path(partPath[p]);
aliasMap.put(path, aliases);
LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>();
PartitionDesc part = new PartitionDesc(tbl, partSpec);
if (isVectorized) {
part.setVectorPartitionDesc(VectorPartitionDesc.createVectorizedInputFileFormat("MockInputFileFormatClassName", false, null));
}
partMap.put(path, part);
}
mapWork.setPathToAliases(aliasMap);
mapWork.setPathToPartitionInfo(partMap);
// write the plan out
FileSystem localFs = FileSystem.getLocal(conf).getRaw();
Path mapXml = new Path(workDir, "map.xml");
localFs.delete(mapXml, true);
FSDataOutputStream planStream = localFs.create(mapXml);
SerializationUtilities.serializePlan(mapWork, planStream);
conf.setBoolean(Utilities.HAS_MAP_WORK, true);
planStream.close();
return conf;
}
Aggregations