Search in sources :

Example 41 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestGridmixMemoryEmulation method testJavaHeapOptionsDisabled.

/**
   * Test disabled task heap options configuration in {@link GridmixJob}.
   */
@Test
@SuppressWarnings("deprecation")
public void testJavaHeapOptionsDisabled() throws Exception {
    Configuration gridmixConf = new Configuration();
    gridmixConf.setBoolean(GridmixJob.GRIDMIX_TASK_JVM_OPTIONS_ENABLE, false);
    // set the default values of simulated job
    gridmixConf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx1m");
    gridmixConf.set(MRJobConfig.REDUCE_JAVA_OPTS, "-Xmx2m");
    gridmixConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, "-Xmx3m");
    // set the default map and reduce task options for original job
    final JobConf originalConf = new JobConf();
    originalConf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx10m");
    originalConf.set(MRJobConfig.REDUCE_JAVA_OPTS, "-Xmx20m");
    originalConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, "-Xmx30m");
    // define a mock job
    MockJob story = new MockJob(originalConf) {

        public JobConf getJobConf() {
            return originalConf;
        }
    };
    GridmixJob job = new DummyGridmixJob(gridmixConf, story);
    Job simulatedJob = job.getJob();
    Configuration simulatedConf = simulatedJob.getConfiguration();
    assertEquals("Map heap options works when disabled!", "-Xmx1m", simulatedConf.get(MRJobConfig.MAP_JAVA_OPTS));
    assertEquals("Reduce heap options works when disabled!", "-Xmx2m", simulatedConf.get(MRJobConfig.REDUCE_JAVA_OPTS));
    assertEquals("Task heap options works when disabled!", "-Xmx3m", simulatedConf.get(JobConf.MAPRED_TASK_JAVA_OPTS));
}
Also used : DummyGridmixJob(org.apache.hadoop.mapred.gridmix.TestHighRamJob.DummyGridmixJob) Configuration(org.apache.hadoop.conf.Configuration) DummyGridmixJob(org.apache.hadoop.mapred.gridmix.TestHighRamJob.DummyGridmixJob) DummyGridmixJob(org.apache.hadoop.mapred.gridmix.TestHighRamJob.DummyGridmixJob) Job(org.apache.hadoop.mapreduce.Job) MockJob(org.apache.hadoop.mapred.gridmix.DebugJobProducer.MockJob) JobConf(org.apache.hadoop.mapred.JobConf) MockJob(org.apache.hadoop.mapred.gridmix.DebugJobProducer.MockJob) Test(org.junit.Test)

Example 42 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hive by apache.

the class TestUtilities method testGetInputSummaryWithContentSummaryInputFormat.

@Test
public void testGetInputSummaryWithContentSummaryInputFormat() throws IOException {
    final int NUM_PARTITIONS = 5;
    final int BYTES_PER_FILE = 10;
    JobConf jobConf = new JobConf();
    Properties properties = new Properties();
    jobConf.setInt(Utilities.DEPRECATED_MAPRED_DFSCLIENT_PARALLELISM_MAX, 2);
    ContentSummaryInputFormatTestClass.setContentSummary(new ContentSummary.Builder().length(BYTES_PER_FILE).fileCount(2).directoryCount(1).build());
    /* Let's write more bytes to the files to test that ContentSummaryInputFormat is actually working returning the file size not from the filesystem */
    ContentSummary summary = runTestGetInputSummary(jobConf, properties, NUM_PARTITIONS, BYTES_PER_FILE * 2, ContentSummaryInputFormatTestClass.class);
    assertEquals(NUM_PARTITIONS * BYTES_PER_FILE, summary.getLength());
    assertEquals(NUM_PARTITIONS * 2, summary.getFileCount());
    assertEquals(NUM_PARTITIONS, summary.getDirectoryCount());
}
Also used : ContentSummary(org.apache.hadoop.fs.ContentSummary) Properties(java.util.Properties) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 43 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hive by apache.

the class TestUtilities method testGetInputPathsWithEmptyTables.

/**
   * Check that calling {@link Utilities#getInputPaths(JobConf, MapWork, Path, Context, boolean)}
   * can process two different empty tables without throwing any exceptions.
   */
@Test
public void testGetInputPathsWithEmptyTables() throws Exception {
    String alias1Name = "alias1";
    String alias2Name = "alias2";
    MapWork mapWork1 = new MapWork();
    MapWork mapWork2 = new MapWork();
    JobConf jobConf = new JobConf();
    Path nonExistentPath1 = new Path(UUID.randomUUID().toString());
    Path nonExistentPath2 = new Path(UUID.randomUUID().toString());
    PartitionDesc mockPartitionDesc = mock(PartitionDesc.class);
    TableDesc mockTableDesc = mock(TableDesc.class);
    when(mockTableDesc.isNonNative()).thenReturn(false);
    when(mockTableDesc.getProperties()).thenReturn(new Properties());
    when(mockPartitionDesc.getProperties()).thenReturn(new Properties());
    when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc);
    doReturn(HiveSequenceFileOutputFormat.class).when(mockPartitionDesc).getOutputFileFormatClass();
    mapWork1.setPathToAliases(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath1, Lists.newArrayList(alias1Name))));
    mapWork1.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(ImmutableMap.of(alias1Name, (Operator<?>) mock(Operator.class))));
    mapWork1.setPathToPartitionInfo(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath1, mockPartitionDesc)));
    mapWork2.setPathToAliases(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath2, Lists.newArrayList(alias2Name))));
    mapWork2.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(ImmutableMap.of(alias2Name, (Operator<?>) mock(Operator.class))));
    mapWork2.setPathToPartitionInfo(new LinkedHashMap<>(ImmutableMap.of(nonExistentPath2, mockPartitionDesc)));
    List<Path> inputPaths = new ArrayList<>();
    try {
        Path scratchDir = new Path(HiveConf.getVar(jobConf, HiveConf.ConfVars.LOCALSCRATCHDIR));
        inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork1, scratchDir, mock(Context.class), false));
        inputPaths.addAll(Utilities.getInputPaths(jobConf, mapWork2, scratchDir, mock(Context.class), false));
        assertEquals(inputPaths.size(), 2);
    } finally {
        File file;
        for (Path path : inputPaths) {
            file = new File(path.toString());
            if (file.exists()) {
                file.delete();
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) Properties(java.util.Properties) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) File(java.io.File) Test(org.junit.Test)

Example 44 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hive by apache.

the class TestUtilities method testGetInputSummaryWithMultipleThreads.

@Test
public void testGetInputSummaryWithMultipleThreads() throws IOException {
    final int NUM_PARTITIONS = 5;
    final int BYTES_PER_FILE = 5;
    JobConf jobConf = new JobConf();
    Properties properties = new Properties();
    jobConf.setInt(HiveConf.ConfVars.HIVE_EXEC_INPUT_LISTING_MAX_THREADS.varname, 2);
    ContentSummary summary = runTestGetInputSummary(jobConf, properties, NUM_PARTITIONS, BYTES_PER_FILE, HiveInputFormat.class);
    assertEquals(NUM_PARTITIONS * BYTES_PER_FILE, summary.getLength());
    assertEquals(NUM_PARTITIONS, summary.getFileCount());
    assertEquals(NUM_PARTITIONS, summary.getDirectoryCount());
    // Test deprecated mapred.dfsclient.parallelism.max
    jobConf.setInt(HiveConf.ConfVars.HIVE_EXEC_INPUT_LISTING_MAX_THREADS.varname, 0);
    jobConf.setInt(Utilities.DEPRECATED_MAPRED_DFSCLIENT_PARALLELISM_MAX, 2);
    summary = runTestGetInputSummary(jobConf, properties, NUM_PARTITIONS, BYTES_PER_FILE, HiveInputFormat.class);
    assertEquals(NUM_PARTITIONS * BYTES_PER_FILE, summary.getLength());
    assertEquals(NUM_PARTITIONS, summary.getFileCount());
    assertEquals(NUM_PARTITIONS, summary.getDirectoryCount());
}
Also used : ContentSummary(org.apache.hadoop.fs.ContentSummary) Properties(java.util.Properties) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 45 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hive by apache.

the class GenericUDTFGetSplits method getSplits.

public InputSplit[] getSplits(JobConf job, int numSplits, TezWork work, Schema schema) throws IOException {
    DAG dag = DAG.create(work.getName());
    dag.setCredentials(job.getCredentials());
    DagUtils utils = DagUtils.getInstance();
    Context ctx = new Context(job);
    MapWork mapWork = (MapWork) work.getAllWork().get(0);
    // bunch of things get setup in the context based on conf but we need only the MR tmp directory
    // for the following method.
    JobConf wxConf = utils.initializeVertexConf(job, ctx, mapWork);
    // TODO: should we also whitelist input formats here? from mapred.input.format.class
    Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), job);
    FileSystem fs = scratchDir.getFileSystem(job);
    try {
        LocalResource appJarLr = createJarLocalResource(utils.getExecJarPathLocal(), utils, job);
        Vertex wx = utils.createVertex(wxConf, mapWork, scratchDir, appJarLr, new ArrayList<LocalResource>(), fs, ctx, false, work, work.getVertexType(mapWork));
        String vertexName = wx.getName();
        dag.addVertex(wx);
        utils.addCredentials(mapWork, dag);
        // we have the dag now proceed to get the splits:
        Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS));
        Preconditions.checkState(HiveConf.getBoolVar(wxConf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS));
        HiveSplitGenerator splitGenerator = new HiveSplitGenerator(wxConf, mapWork);
        List<Event> eventList = splitGenerator.initialize();
        InputSplit[] result = new InputSplit[eventList.size() - 1];
        InputConfigureVertexTasksEvent configureEvent = (InputConfigureVertexTasksEvent) eventList.get(0);
        List<TaskLocationHint> hints = configureEvent.getLocationHint().getTaskLocationHints();
        Preconditions.checkState(hints.size() == eventList.size() - 1);
        if (LOG.isDebugEnabled()) {
            LOG.debug("NumEvents=" + eventList.size() + ", NumSplits=" + result.length);
        }
        LlapCoordinator coordinator = LlapCoordinator.getInstance();
        if (coordinator == null) {
            throw new IOException("LLAP coordinator is not initialized; must be running in HS2 with " + ConfVars.LLAP_HS2_ENABLE_COORDINATOR.varname + " enabled");
        }
        // See the discussion in the implementation as to why we generate app ID.
        ApplicationId applicationId = coordinator.createExtClientAppId();
        // This assumes LLAP cluster owner is always the HS2 user.
        String llapUser = UserGroupInformation.getLoginUser().getShortUserName();
        String queryUser = null;
        byte[] tokenBytes = null;
        LlapSigner signer = null;
        if (UserGroupInformation.isSecurityEnabled()) {
            signer = coordinator.getLlapSigner(job);
            // 1. Generate the token for query user (applies to all splits).
            queryUser = SessionState.getUserFromAuthenticator();
            if (queryUser == null) {
                queryUser = UserGroupInformation.getCurrentUser().getUserName();
                LOG.warn("Cannot determine the session user; using " + queryUser + " instead");
            }
            LlapTokenLocalClient tokenClient = coordinator.getLocalTokenClient(job, llapUser);
            // We put the query user, not LLAP user, into the message and token.
            Token<LlapTokenIdentifier> token = tokenClient.createToken(applicationId.toString(), queryUser, true);
            LOG.info("Created the token for remote user: {}", token);
            bos.reset();
            token.write(dos);
            tokenBytes = bos.toByteArray();
        } else {
            queryUser = UserGroupInformation.getCurrentUser().getUserName();
        }
        LOG.info("Number of splits: " + (eventList.size() - 1));
        SignedMessage signedSvs = null;
        for (int i = 0; i < eventList.size() - 1; i++) {
            TaskSpec taskSpec = new TaskSpecBuilder().constructTaskSpec(dag, vertexName, eventList.size() - 1, applicationId, i);
            // 2. Generate the vertex/submit information for all events.
            if (i == 0) {
                // The queryId could either be picked up from the current request being processed, or
                // generated. The current request isn't exactly correct since the query is 'done' once we
                // return the results. Generating a new one has the added benefit of working once this
                // is moved out of a UDTF into a proper API.
                // Setting this to the generated AppId which is unique.
                // Despite the differences in TaskSpec, the vertex spec should be the same.
                signedSvs = createSignedVertexSpec(signer, taskSpec, applicationId, queryUser, applicationId.toString());
            }
            SubmitWorkInfo submitWorkInfo = new SubmitWorkInfo(applicationId, System.currentTimeMillis(), taskSpec.getVertexParallelism(), signedSvs.message, signedSvs.signature);
            byte[] submitWorkBytes = SubmitWorkInfo.toBytes(submitWorkInfo);
            // 3. Generate input event.
            SignedMessage eventBytes = makeEventBytes(wx, vertexName, eventList.get(i + 1), signer);
            // 4. Make location hints.
            SplitLocationInfo[] locations = makeLocationHints(hints.get(i));
            result[i] = new LlapInputSplit(i, submitWorkBytes, eventBytes.message, eventBytes.signature, locations, schema, llapUser, tokenBytes);
        }
        return result;
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) SubmitWorkInfo(org.apache.hadoop.hive.llap.SubmitWorkInfo) LlapTokenIdentifier(org.apache.hadoop.hive.llap.security.LlapTokenIdentifier) SplitLocationInfo(org.apache.hadoop.mapred.SplitLocationInfo) HiveSplitGenerator(org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator) TaskSpecBuilder(org.apache.tez.dag.api.TaskSpecBuilder) LlapSigner(org.apache.hadoop.hive.llap.security.LlapSigner) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) LlapTokenLocalClient(org.apache.hadoop.hive.llap.security.LlapTokenLocalClient) DagUtils(org.apache.hadoop.hive.ql.exec.tez.DagUtils) LlapInputSplit(org.apache.hadoop.hive.llap.LlapInputSplit) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) LlapInputSplit(org.apache.hadoop.hive.llap.LlapInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) Context(org.apache.hadoop.hive.ql.Context) Path(org.apache.hadoop.fs.Path) TaskSpec(org.apache.tez.runtime.api.impl.TaskSpec) SignedMessage(org.apache.hadoop.hive.llap.security.LlapSigner.SignedMessage) DAG(org.apache.tez.dag.api.DAG) IOException(java.io.IOException) LlapCoordinator(org.apache.hadoop.hive.llap.coordinator.LlapCoordinator) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) LoginException(javax.security.auth.login.LoginException) URISyntaxException(java.net.URISyntaxException) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) CommandNeedRetryException(org.apache.hadoop.hive.ql.CommandNeedRetryException) IOException(java.io.IOException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) Event(org.apache.tez.runtime.api.Event) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) InputDataInformationEvent(org.apache.tez.runtime.api.events.InputDataInformationEvent) InputConfigureVertexTasksEvent(org.apache.tez.runtime.api.events.InputConfigureVertexTasksEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Aggregations

JobConf (org.apache.hadoop.mapred.JobConf)1037 Path (org.apache.hadoop.fs.Path)510 Test (org.junit.Test)317 FileSystem (org.apache.hadoop.fs.FileSystem)264 IOException (java.io.IOException)204 Configuration (org.apache.hadoop.conf.Configuration)163 InputSplit (org.apache.hadoop.mapred.InputSplit)110 ArrayList (java.util.ArrayList)89 Text (org.apache.hadoop.io.Text)82 File (java.io.File)81 RunningJob (org.apache.hadoop.mapred.RunningJob)67 Properties (java.util.Properties)58 List (java.util.List)49 HashMap (java.util.HashMap)47 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)47 SequenceFile (org.apache.hadoop.io.SequenceFile)45 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)44 Map (java.util.Map)42 Job (org.apache.hadoop.mapreduce.Job)42 LongWritable (org.apache.hadoop.io.LongWritable)41