use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class TestGridmixMemoryEmulation method testJavaHeapOptionsDisabled.
/**
* Test disabled task heap options configuration in {@link GridmixJob}.
*/
@Test
@SuppressWarnings("deprecation")
public void testJavaHeapOptionsDisabled() throws Exception {
Configuration gridmixConf = new Configuration();
gridmixConf.setBoolean(GridmixJob.GRIDMIX_TASK_JVM_OPTIONS_ENABLE, false);
// set the default values of simulated job
gridmixConf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx1m");
gridmixConf.set(MRJobConfig.REDUCE_JAVA_OPTS, "-Xmx2m");
gridmixConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, "-Xmx3m");
// set the default map and reduce task options for original job
final JobConf originalConf = new JobConf();
originalConf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx10m");
originalConf.set(MRJobConfig.REDUCE_JAVA_OPTS, "-Xmx20m");
originalConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, "-Xmx30m");
// define a mock job
MockJob story = new MockJob(originalConf) {
public JobConf getJobConf() {
return originalConf;
}
};
GridmixJob job = new DummyGridmixJob(gridmixConf, story);
Job simulatedJob = job.getJob();
Configuration simulatedConf = simulatedJob.getConfiguration();
assertEquals("Map heap options works when disabled!", "-Xmx1m", simulatedConf.get(MRJobConfig.MAP_JAVA_OPTS));
assertEquals("Reduce heap options works when disabled!", "-Xmx2m", simulatedConf.get(MRJobConfig.REDUCE_JAVA_OPTS));
assertEquals("Task heap options works when disabled!", "-Xmx3m", simulatedConf.get(JobConf.MAPRED_TASK_JAVA_OPTS));
}
use of org.apache.hadoop.mapred.JobConf in project hive by apache.
the class TestUtilities method testGetInputSummaryWithContentSummaryInputFormat.
@Test
public void testGetInputSummaryWithContentSummaryInputFormat() throws IOException {
final int NUM_PARTITIONS = 5;
final int BYTES_PER_FILE = 10;
JobConf jobConf = new JobConf();
Properties properties = new Properties();
jobConf.setInt(Utilities.DEPRECATED_MAPRED_DFSCLIENT_PARALLELISM_MAX, 2);
ContentSummaryInputFormatTestClass.setContentSummary(new ContentSummary.Builder().length(BYTES_PER_FILE).fileCount(2).directoryCount(1).build());
/* Let's write more bytes to the files to test that ContentSummaryInputFormat is actually working returning the file size not from the filesystem */
ContentSummary summary = runTestGetInputSummary(jobConf, properties, NUM_PARTITIONS, BYTES_PER_FILE * 2, ContentSummaryInputFormatTestClass.class);
assertEquals(NUM_PARTITIONS * BYTES_PER_FILE, summary.getLength());
assertEquals(NUM_PARTITIONS * 2, summary.getFileCount());
assertEquals(NUM_PARTITIONS, summary.getDirectoryCount());
}
use of org.apache.hadoop.mapred.JobConf in project hive by apache.
the class TestUtilities method testGetInputPathsWithEmptyTables.
/**
* Check that calling {@link Utilities#getInputPaths(JobConf, MapWork, Path, Context, boolean)}
* can process two different empty tables without throwing any exceptions.
*/
@Test
public void testGetInputPathsWithEmptyTables() throws Exception {
String alias1Name = "alias1";
String alias2Name = "alias2";
MapWork mapWork1 = new MapWork();
MapWork mapWork2 = new MapWork();
JobConf jobConf = new JobConf();
Path nonExistentPath1 = new Path(UUID.randomUUID().toString());
Path nonExistentPath2 = new Path(UUID.randomUUID().toString());
PartitionDesc mockPartitionDesc = mock(PartitionDesc.class);
TableDesc mockTableDesc = mock(TableDesc.class);
when(mockTableDesc.isNonNative()).thenReturn(false);
when(mockTableDesc.getProperties()).thenReturn(new Properties());
when(mockPartitionDesc.getProperties()).thenReturn(new Properties());
when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc);
doReturn(HiveSequenceFileOutputFormat.class).when(mockPartitionDesc).getOutputFileFormatClass();
mapWork1.setPathToAliases(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath1, Lists.newArrayList(alias1Name))));
mapWork1.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(ImmutableMap.of(alias1Name, (Operator<?>) mock(Operator.class))));
mapWork1.setPathToPartitionInfo(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath1, mockPartitionDesc)));
mapWork2.setPathToAliases(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath2, Lists.newArrayList(alias2Name))));
mapWork2.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(ImmutableMap.of(alias2Name, (Operator<?>) mock(Operator.class))));
mapWork2.setPathToPartitionInfo(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath2, mockPartitionDesc)));
List<Path> inputPaths = new ArrayList<>();
try {
Path scratchDir = new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR));
inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork1, scratchDir, mock(Context.class), false));
inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork2, scratchDir, mock(Context.class), false));
assertEquals(inputPaths.size(), 2);
} finally {
File file;
for (Path path : inputPaths) {
file = new File(path.toString());
if (file.exists()) {
file.delete();
}
}
}
}
use of org.apache.hadoop.mapred.JobConf in project hive by apache.
the class TestUtilities method testGetInputSummaryWithMultipleThreads.
@Test
public void testGetInputSummaryWithMultipleThreads() throws IOException {
final int NUM_PARTITIONS = 5;
final int BYTES_PER_FILE = 5;
JobConf jobConf = new JobConf();
Properties properties = new Properties();
jobConf.setInt(HiveConf.ConfVars.HIVE_EXEC_INPUT_LISTING_MAX_THREADS.varname, 2);
ContentSummary summary = runTestGetInputSummary(jobConf, properties, NUM_PARTITIONS, BYTES_PER_FILE, HiveInputFormat.class);
assertEquals(NUM_PARTITIONS * BYTES_PER_FILE, summary.getLength());
assertEquals(NUM_PARTITIONS, summary.getFileCount());
assertEquals(NUM_PARTITIONS, summary.getDirectoryCount());
// Test deprecated mapred.dfsclient.parallelism.max
jobConf.setInt(HiveConf.ConfVars.HIVE_EXEC_INPUT_LISTING_MAX_THREADS.varname, 0);
jobConf.setInt(Utilities.DEPRECATED_MAPRED_DFSCLIENT_PARALLELISM_MAX, 2);
summary = runTestGetInputSummary(jobConf, properties, NUM_PARTITIONS, BYTES_PER_FILE, HiveInputFormat.class);
assertEquals(NUM_PARTITIONS * BYTES_PER_FILE, summary.getLength());
assertEquals(NUM_PARTITIONS, summary.getFileCount());
assertEquals(NUM_PARTITIONS, summary.getDirectoryCount());
}
use of org.apache.hadoop.mapred.JobConf in project hive by apache.
the class GenericUDTFGetSplits method getSplits.
public InputSplit[] getSplits(JobConf job, int numSplits, TezWork work, Schema schema) throws IOException {
DAG dag = DAG.create(work.getName());
dag.setCredentials(job.getCredentials());
DagUtils utils = DagUtils.getInstance();
Context ctx = new Context(job);
MapWork mapWork = (MapWork) work.getAllWork().get(0);
// bunch of things get setup in the context based on conf but we need only the MR tmp directory
// for the following method.
JobConf wxConf = utils.initializeVertexConf(job, ctx, mapWork);
// TODO: should we also whitelist input formats here? from mapred.input.format.class
Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), job);
FileSystem fs = scratchDir.getFileSystem(job);
try {
LocalResource appJarLr = createJarLocalResource(utils.getExecJarPathLocal(), utils, job);
Vertex wx = utils.createVertex(wxConf, mapWork, scratchDir, appJarLr, new ArrayList<LocalResource>(), fs, ctx, false, work, work.getVertexType(mapWork));
String vertexName = wx.getName();
dag.addVertex(wx);
utils.addCredentials(mapWork, dag);
// we have the dag now proceed to get the splits:
Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS));
Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS));
HiveSplitGenerator splitGenerator = new HiveSplitGenerator(wxConf, mapWork);
List<Event> eventList = splitGenerator.initialize();
InputSplit[] result = new InputSplit[eventList.size() - 1];
InputConfigureVertexTasksEvent configureEvent = (InputConfigureVertexTasksEvent) eventList.get(0);
List<TaskLocationHint> hints = configureEvent.getLocationHint().getTaskLocationHints();
Preconditions.checkState(hints.size() == eventList.size() - 1);
if (LOG.isDebugEnabled()) {
LOG.debug("NumEvents=" + eventList.size() + ", NumSplits=" + result.length);
}
LlapCoordinator coordinator = LlapCoordinator.getInstance();
if (coordinator == null) {
throw new IOException("LLAP coordinator is not initialized; must be running in HS2 with " + ConfVars.LLAP_HS2_ENABLE_COORDINATOR.varname + " enabled");
}
// See the discussion in the implementation as to why we generate app ID.
ApplicationId applicationId = coordinator.createExtClientAppId();
// This assumes LLAP cluster owner is always the HS2 user.
String llapUser = UserGroupInformation.getLoginUser().getShortUserName();
String queryUser = null;
byte[] tokenBytes = null;
LlapSigner signer = null;
if (UserGroupInformation.isSecurityEnabled()) {
signer = coordinator.getLlapSigner(job);
// 1. Generate the token for query user (applies to all splits).
queryUser = SessionState.getUserFromAuthenticator();
if (queryUser == null) {
queryUser = UserGroupInformation.getCurrentUser().getUserName();
LOG.warn("Cannot determine the session user; using " + queryUser + " instead");
}
LlapTokenLocalClient tokenClient = coordinator.getLocalTokenClient(job, llapUser);
// We put the query user, not LLAP user, into the message and token.
Token<LlapTokenIdentifier> token = tokenClient.createToken(applicationId.toString(), queryUser, true);
LOG.info("Created the token for remote user: {}", token);
bos.reset();
token.write(dos);
tokenBytes = bos.toByteArray();
} else {
queryUser = UserGroupInformation.getCurrentUser().getUserName();
}
LOG.info("Number of splits: " + (eventList.size() - 1));
SignedMessage signedSvs = null;
for (int i = 0; i < eventList.size() - 1; i++) {
TaskSpec taskSpec = new TaskSpecBuilder().constructTaskSpec(dag, vertexName, eventList.size() - 1, applicationId, i);
// 2. Generate the vertex/submit information for all events.
if (i == 0) {
// The queryId could either be picked up from the current request being processed, or
// generated. The current request isn't exactly correct since the query is 'done' once we
// return the results. Generating a new one has the added benefit of working once this
// is moved out of a UDTF into a proper API.
// Setting this to the generated AppId which is unique.
// Despite the differences in TaskSpec, the vertex spec should be the same.
signedSvs = createSignedVertexSpec(signer, taskSpec, applicationId, queryUser, applicationId.toString());
}
SubmitWorkInfo submitWorkInfo = new SubmitWorkInfo(applicationId, System.currentTimeMillis(), taskSpec.getVertexParallelism(), signedSvs.message, signedSvs.signature);
byte[] submitWorkBytes = SubmitWorkInfo.toBytes(submitWorkInfo);
// 3. Generate input event.
SignedMessage eventBytes = makeEventBytes(wx, vertexName, eventList.get(i + 1), signer);
// 4. Make location hints.
SplitLocationInfo[] locations = makeLocationHints(hints.get(i));
result[i] = new LlapInputSplit(i, submitWorkBytes, eventBytes.message, eventBytes.signature, locations, schema, llapUser, tokenBytes);
}
return result;
} catch (Exception e) {
throw new IOException(e);
}
}
Aggregations