use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class TestUtilities method runTestGetInputSummary.
private ContentSummary runTestGetInputSummary(JobConf jobConf, Properties properties, int numOfPartitions, int bytesPerFile, Class<? extends InputFormat> inputFormatClass) throws IOException {
// creates scratch directories needed by the Context object
SessionState.start(new HiveConf());
MapWork mapWork = new MapWork();
Context context = new Context(jobConf);
LinkedHashMap<Path, PartitionDesc> pathToPartitionInfo = new LinkedHashMap<>();
LinkedHashMap<Path, ArrayList<String>> pathToAliasTable = new LinkedHashMap<>();
TableScanOperator scanOp = new TableScanOperator();
PartitionDesc partitionDesc = new PartitionDesc(new TableDesc(inputFormatClass, null, properties), null);
String testTableName = "testTable";
Path testTablePath = new Path(testTableName);
Path[] testPartitionsPaths = new Path[numOfPartitions];
for (int i = 0; i < numOfPartitions; i++) {
String testPartitionName = "p=" + 1;
testPartitionsPaths[i] = new Path(testTablePath, "p=" + i);
pathToPartitionInfo.put(testPartitionsPaths[i], partitionDesc);
pathToAliasTable.put(testPartitionsPaths[i], Lists.newArrayList(testPartitionName));
mapWork.getAliasToWork().put(testPartitionName, scanOp);
}
mapWork.setPathToAliases(pathToAliasTable);
mapWork.setPathToPartitionInfo(pathToPartitionInfo);
FileSystem fs = FileSystem.getLocal(jobConf);
try {
fs.mkdirs(testTablePath);
byte[] data = new byte[bytesPerFile];
for (int i = 0; i < numOfPartitions; i++) {
fs.mkdirs(testPartitionsPaths[i]);
FSDataOutputStream out = fs.create(new Path(testPartitionsPaths[i], "test1.txt"));
out.write(data);
out.close();
}
return Utilities.getInputSummary(context, mapWork, null);
} finally {
if (fs.exists(testTablePath)) {
fs.delete(testTablePath, true);
}
}
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class TestUtilities method testGetInputPathsWithEmptyTables.
/**
* Check that calling {@link Utilities#getInputPaths(JobConf, MapWork, Path, Context, boolean)}
* can process two different empty tables without throwing any exceptions.
*/
@Test
public void testGetInputPathsWithEmptyTables() throws Exception {
String alias1Name = "alias1";
String alias2Name = "alias2";
MapWork mapWork1 = new MapWork();
MapWork mapWork2 = new MapWork();
JobConf jobConf = new JobConf();
Path nonExistentPath1 = new Path(UUID.randomUUID().toString());
Path nonExistentPath2 = new Path(UUID.randomUUID().toString());
PartitionDesc mockPartitionDesc = mock(PartitionDesc.class);
TableDesc mockTableDesc = mock(TableDesc.class);
when(mockTableDesc.isNonNative()).thenReturn(false);
when(mockTableDesc.getProperties()).thenReturn(new Properties());
when(mockPartitionDesc.getProperties()).thenReturn(new Properties());
when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc);
doReturn(HiveSequenceFileOutputFormat.class).when(mockPartitionDesc).getOutputFileFormatClass();
mapWork1.setPathToAliases(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath1, Lists.newArrayList(alias1Name))));
mapWork1.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(ImmutableMap.of(alias1Name, (Operator<?>) mock(Operator.class))));
mapWork1.setPathToPartitionInfo(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath1, mockPartitionDesc)));
mapWork2.setPathToAliases(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath2, Lists.newArrayList(alias2Name))));
mapWork2.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(ImmutableMap.of(alias2Name, (Operator<?>) mock(Operator.class))));
mapWork2.setPathToPartitionInfo(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath2, mockPartitionDesc)));
List<Path> inputPaths = new ArrayList<>();
try {
Path scratchDir = new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR));
inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork1, scratchDir, mock(Context.class), false));
inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork2, scratchDir, mock(Context.class), false));
assertEquals(inputPaths.size(), 2);
} finally {
File file;
for (Path path : inputPaths) {
file = new File(path.toString());
if (file.exists()) {
file.delete();
}
}
}
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class TestDynamicPartitionPruner method testSingleSourceMultipleFiltersOrdering2.
@Test(timeout = 5000)
public void testSingleSourceMultipleFiltersOrdering2() throws InterruptedException, SerDeException {
InputInitializerContext mockInitContext = mock(InputInitializerContext.class);
doReturn(2).when(mockInitContext).getVertexNumTasks("v1");
MapWork mapWork = createMockMapWork(new TestSource("v1", 2));
DynamicPartitionPruner pruner = new DynamicPartitionPrunerForEventTesting(mockInitContext, mapWork);
PruneRunnable pruneRunnable = new PruneRunnable(pruner);
Thread t = new Thread(pruneRunnable);
t.start();
try {
pruneRunnable.start();
InputInitializerEvent event = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
event.setSourceVertexName("v1");
pruner.processVertex("v1");
pruner.addEvent(event);
pruner.addEvent(event);
pruner.addEvent(event);
pruner.addEvent(event);
pruneRunnable.awaitEnd();
assertFalse(pruneRunnable.inError.get());
} finally {
t.interrupt();
t.join();
}
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class TestDynamicPartitionPruner method testMultipleSourcesOrdering2.
@Test(timeout = 5000)
public void testMultipleSourcesOrdering2() throws InterruptedException, SerDeException {
InputInitializerContext mockInitContext = mock(InputInitializerContext.class);
doReturn(2).when(mockInitContext).getVertexNumTasks("v1");
doReturn(3).when(mockInitContext).getVertexNumTasks("v2");
MapWork mapWork = createMockMapWork(new TestSource("v1", 2), new TestSource("v2", 1));
DynamicPartitionPruner pruner = new DynamicPartitionPrunerForEventTesting(mockInitContext, mapWork);
PruneRunnable pruneRunnable = new PruneRunnable(pruner);
Thread t = new Thread(pruneRunnable);
t.start();
try {
pruneRunnable.start();
InputInitializerEvent eventV1 = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
eventV1.setSourceVertexName("v1");
InputInitializerEvent eventV2 = InputInitializerEvent.create("FakeTarget", "TargetInput", ByteBuffer.allocate(0));
eventV2.setSourceVertexName("v2");
// 2 X 2 events for V1. 3 X 1 events for V2
pruner.processVertex("v1");
pruner.processVertex("v2");
pruner.addEvent(eventV1);
pruner.addEvent(eventV1);
pruner.addEvent(eventV1);
pruner.addEvent(eventV1);
pruner.addEvent(eventV2);
pruner.addEvent(eventV2);
pruner.addEvent(eventV2);
pruneRunnable.awaitEnd();
assertFalse(pruneRunnable.inError.get());
} finally {
t.interrupt();
t.join();
}
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class TestDynamicPartitionPruner method createMockMapWork.
private MapWork createMockMapWork(TestSource... testSources) {
MapWork mapWork = mock(MapWork.class);
Map<String, List<TableDesc>> tableMap = new HashMap<>();
Map<String, List<String>> columnMap = new HashMap<>();
Map<String, List<String>> typeMap = new HashMap<>();
Map<String, List<ExprNodeDesc>> exprMap = new HashMap<>();
int count = 0;
for (TestSource testSource : testSources) {
for (int i = 0; i < testSource.numExpressions; i++) {
List<TableDesc> tableDescList = tableMap.get(testSource.vertexName);
if (tableDescList == null) {
tableDescList = new LinkedList<>();
tableMap.put(testSource.vertexName, tableDescList);
}
tableDescList.add(mock(TableDesc.class));
List<String> columnList = columnMap.get(testSource.vertexName);
if (columnList == null) {
columnList = new LinkedList<>();
columnMap.put(testSource.vertexName, columnList);
}
columnList.add(testSource.vertexName + "c_" + count + "_" + i);
List<String> typeList = typeMap.get(testSource.vertexName);
if (typeList == null) {
typeList = new LinkedList<>();
typeMap.put(testSource.vertexName, typeList);
}
typeList.add("string");
List<ExprNodeDesc> exprNodeDescList = exprMap.get(testSource.vertexName);
if (exprNodeDescList == null) {
exprNodeDescList = new LinkedList<>();
exprMap.put(testSource.vertexName, exprNodeDescList);
}
exprNodeDescList.add(mock(ExprNodeDesc.class));
}
count++;
}
doReturn(tableMap).when(mapWork).getEventSourceTableDescMap();
doReturn(columnMap).when(mapWork).getEventSourceColumnNameMap();
doReturn(exprMap).when(mapWork).getEventSourcePartKeyExprMap();
doReturn(typeMap).when(mapWork).getEventSourceColumnTypeMap();
return mapWork;
}
Aggregations