use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class VectorizedRowBatchCtx method getPartitionValues.
public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, Configuration hiveConf, FileSplit split, Object[] partitionValues) throws IOException {
// TODO: this is invalid for SMB. Keep this for now for legacy reasons. See the other overload.
MapWork mapWork = Utilities.getMapWork(hiveConf);
getPartitionValues(vrbCtx, mapWork, split, partitionValues);
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class TestUtilities method runTestGetInputPaths.
private void runTestGetInputPaths(JobConf jobConf, int numOfPartitions) throws Exception {
MapWork mapWork = new MapWork();
Path scratchDir = new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR));
LinkedHashMap<Path, ArrayList<String>> pathToAliasTable = new LinkedHashMap<>();
String testTableName = "testTable";
Path testTablePath = new Path(testTableName);
Path[] testPartitionsPaths = new Path[numOfPartitions];
for (int i = 0; i < numOfPartitions; i++) {
String testPartitionName = "p=" + i;
testPartitionsPaths[i] = new Path(testTablePath, "p=" + i);
pathToAliasTable.put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
mapWork.getAliasToWork().put(testPartitionName, (Operator<?>) mock(Operator.class));
}
mapWork.setPathToAliases(pathToAliasTable);
FileSystem fs = FileSystem.getLocal(jobConf);
try {
fs.mkdirs(testTablePath);
for (int i = 0; i < numOfPartitions; i++) {
fs.mkdirs(testPartitionsPaths[i]);
fs.create(new Path(testPartitionsPaths[i], "test1.txt")).close();
}
List<Path> inputPaths = Utilities.getInputPaths(jobConf, mapWork, scratchDir, mock(Context.class), false);
assertEquals(inputPaths.size(), numOfPartitions);
for (int i = 0; i < numOfPartitions; i++) {
assertEquals(inputPaths.get(i), testPartitionsPaths[i]);
}
} finally {
if (fs.exists(testTablePath)) {
fs.delete(testTablePath, true);
}
}
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class TestUtilities method testGetInputPathsWithMultipleThreadsAndEmptyPartitions.
/**
* Check that calling {@link Utilities#getInputPaths(JobConf, MapWork, Path, Context, boolean)}
* can process two different tables that both have empty partitions when using multiple threads.
* Some extra logic is placed at the end of the test to validate no race conditions put the
* {@link MapWork} object in an invalid state.
*/
@Test
public void testGetInputPathsWithMultipleThreadsAndEmptyPartitions() throws Exception {
int numPartitions = 15;
JobConf jobConf = new JobConf();
jobConf.setInt(HiveConf.ConfVars.HIVE_EXEC_INPUT_LISTING_MAX_THREADS.varname, Runtime.getRuntime().availableProcessors() * 2);
MapWork mapWork = new MapWork();
Path testTablePath = new Path("testTable");
Path[] testPartitionsPaths = new Path[numPartitions];
PartitionDesc mockPartitionDesc = mock(PartitionDesc.class);
TableDesc mockTableDesc = mock(TableDesc.class);
when(mockTableDesc.isNonNative()).thenReturn(false);
when(mockTableDesc.getProperties()).thenReturn(new Properties());
when(mockPartitionDesc.getProperties()).thenReturn(new Properties());
when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc);
doReturn(HiveSequenceFileOutputFormat.class).when(mockPartitionDesc).getOutputFileFormatClass();
for (int i = 0; i < numPartitions; i++) {
String testPartitionName = "p=" + i;
testPartitionsPaths[i] = new Path(testTablePath, "p=" + i);
mapWork.getPathToAliases().put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
mapWork.getAliasToWork().put(testPartitionName, (Operator<?>) mock(Operator.class));
mapWork.getPathToPartitionInfo().put(testPartitionsPaths[i], mockPartitionDesc);
}
FileSystem fs = FileSystem.getLocal(jobConf);
try {
fs.mkdirs(testTablePath);
List<Path> inputPaths = Utilities.getInputPaths(jobConf, mapWork, new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR)), mock(Context.class), false);
assertEquals(inputPaths.size(), numPartitions);
for (int i = 0; i < numPartitions; i++) {
assertNotEquals(inputPaths.get(i), testPartitionsPaths[i]);
}
assertEquals(mapWork.getPathToAliases().size(), numPartitions);
assertEquals(mapWork.getPathToPartitionInfo().size(), numPartitions);
assertEquals(mapWork.getAliasToWork().size(), numPartitions);
for (Map.Entry<Path, ArrayList<String>> entry : mapWork.getPathToAliases().entrySet()) {
assertNotNull(entry.getKey());
assertNotNull(entry.getValue());
assertEquals(entry.getValue().size(), 1);
assertTrue(entry.getKey().getFileSystem(new Configuration()).exists(entry.getKey()));
}
} finally {
if (fs.exists(testTablePath)) {
fs.delete(testTablePath, true);
}
}
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class MoveTask method inferTaskInformation.
private void inferTaskInformation(TaskInformation ti) {
// (Either standard, local, or a merge)
while (ti.task.getParentTasks() != null && ti.task.getParentTasks().size() == 1) {
ti.task = (Task) ti.task.getParentTasks().get(0);
// If it was a merge task or a local map reduce task, nothing can be inferred
if (ti.task instanceof MergeFileTask || ti.task instanceof MapredLocalTask) {
break;
}
// the directory this move task is moving
if (ti.task instanceof MapRedTask) {
MapredWork work = (MapredWork) ti.task.getWork();
MapWork mapWork = work.getMapWork();
ti.bucketCols = mapWork.getBucketedColsByDirectory().get(ti.path);
ti.sortCols = mapWork.getSortedColsByDirectory().get(ti.path);
if (work.getReduceWork() != null) {
ti.numBuckets = work.getReduceWork().getNumReduceTasks();
}
if (ti.bucketCols != null || ti.sortCols != null) {
// operator that writes the final output)
assert work.isFinalMapRed();
}
break;
}
// condition for merging is not met, see GenMRFileSink1.
if (ti.task instanceof MoveTask) {
MoveTask mt = (MoveTask) ti.task;
if (mt.getWork().getLoadFileWork() != null) {
ti.path = mt.getWork().getLoadFileWork().getSourcePath().toUri().toString();
}
}
}
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class LlapRecordReader method findMapWork.
private static MapWork findMapWork(JobConf job) throws HiveException {
String inputName = job.get(Utilities.INPUT_NAME, null);
if (LOG.isDebugEnabled()) {
LOG.debug("Initializing for input " + inputName);
}
String prefixes = job.get(DagUtils.TEZ_MERGE_WORK_FILE_PREFIXES);
if (prefixes != null && !StringUtils.isBlank(prefixes)) {
// So, we don't use the below code that would get the correct MapWork. See HIVE-16985.
return null;
}
BaseWork work = null;
// HIVE-16985: try to find the fake merge work for SMB join, that is really another MapWork.
if (inputName != null) {
if (prefixes == null || !Lists.newArrayList(prefixes.split(",")).contains(inputName)) {
inputName = null;
}
}
if (inputName != null) {
work = Utilities.getMergeWork(job, inputName);
}
if (work == null || !(work instanceof MapWork)) {
work = Utilities.getMapWork(job);
}
return (MapWork) work;
}
Aggregations