Search in sources :

Example 11 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class TestMultipleOutputs method _testMultipleOutputs.

protected void _testMultipleOutputs(boolean withCounters) throws Exception {
    Path inDir = getDir(IN_DIR);
    Path outDir = getDir(OUT_DIR);
    JobConf conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    DataOutputStream file = fs.create(new Path(inDir, "part-0"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    file = fs.create(new Path(inDir, "part-1"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    conf.setJobName("mo");
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(TextOutputFormat.class);
    MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, "sequence", SequenceFileOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.setCountersEnabled(conf, withCounters);
    conf.setMapperClass(MOMap.class);
    conf.setReducerClass(MOReduce.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    JobClient jc = new JobClient(conf);
    RunningJob job = jc.submitJob(conf);
    while (!job.isComplete()) {
        Thread.sleep(100);
    }
    // assert number of named output part files
    int namedOutputCount = 0;
    FileStatus[] statuses = fs.listStatus(outDir);
    for (FileStatus status : statuses) {
        if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-m-00001") || status.getPath().getName().equals("text-r-00000") || status.getPath().getName().equals("sequence_A-m-00000") || status.getPath().getName().equals("sequence_A-m-00001") || status.getPath().getName().equals("sequence_B-m-00000") || status.getPath().getName().equals("sequence_B-m-00001") || status.getPath().getName().equals("sequence_B-r-00000") || status.getPath().getName().equals("sequence_C-r-00000")) {
            namedOutputCount++;
        }
    }
    assertEquals(9, namedOutputCount);
    // assert TextOutputFormat files correctness
    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
    int count = 0;
    String line = reader.readLine();
    while (line != null) {
        assertTrue(line.endsWith("text"));
        line = reader.readLine();
        count++;
    }
    reader.close();
    assertFalse(count == 0);
    // assert SequenceOutputFormat files correctness
    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(conf), "sequence_B-r-00000"), conf);
    assertEquals(LongWritable.class, seqReader.getKeyClass());
    assertEquals(Text.class, seqReader.getValueClass());
    count = 0;
    LongWritable key = new LongWritable();
    Text value = new Text();
    while (seqReader.next(key, value)) {
        assertEquals("sequence", value.toString());
        count++;
    }
    seqReader.close();
    assertFalse(count == 0);
    Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
    if (!withCounters) {
        assertEquals(0, counters.size());
    } else {
        assertEquals(4, counters.size());
        assertEquals(4, counters.getCounter("text"));
        assertEquals(2, counters.getCounter("sequence_A"));
        assertEquals(4, counters.getCounter("sequence_B"));
        assertEquals(2, counters.getCounter("sequence_C"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) InputStreamReader(java.io.InputStreamReader) DataOutputStream(java.io.DataOutputStream) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) Text(org.apache.hadoop.io.Text) JobClient(org.apache.hadoop.mapred.JobClient) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) BufferedReader(java.io.BufferedReader) Counters(org.apache.hadoop.mapred.Counters) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf)

Example 12 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hive by apache.

the class Driver method getClusterStatus.

/**
   * Return the status information about the Map-Reduce cluster
   */
public ClusterStatus getClusterStatus() throws Exception {
    ClusterStatus cs;
    try {
        JobConf job = new JobConf(conf);
        JobClient jc = new JobClient(job);
        cs = jc.getClusterStatus();
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    }
    LOG.info("Returning cluster status: " + cs.toString());
    return cs;
}
Also used : JobConf(org.apache.hadoop.mapred.JobConf) JobClient(org.apache.hadoop.mapred.JobClient) ClusterStatus(org.apache.hadoop.mapred.ClusterStatus) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException)

Example 13 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hive by apache.

the class HCatUtil method getJobTrackerDelegationToken.

public static Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> getJobTrackerDelegationToken(Configuration conf, String userName) throws Exception {
    // LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")");
    JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class));
    Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> t = jcl.getDelegationToken(new Text(userName));
    // LOG.info("got "+t);
    return t;
// return null;
}
Also used : DelegationTokenIdentifier(org.apache.hadoop.hive.thrift.DelegationTokenIdentifier) AbstractDelegationTokenIdentifier(org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier) HCatOutputFormat(org.apache.hive.hcatalog.mapreduce.HCatOutputFormat) Text(org.apache.hadoop.io.Text) JobClient(org.apache.hadoop.mapred.JobClient) JobConf(org.apache.hadoop.mapred.JobConf)

Example 14 with JobClient

use of org.apache.hadoop.mapred.JobClient in project whirr by apache.

the class HadoopServiceTest method test.

@Test
public void test() throws Exception {
    Configuration conf = getConfiguration();
    JobConf job = new JobConf(conf, HadoopServiceTest.class);
    JobClient client = new JobClient(job);
    waitForTaskTrackers(client);
    FileSystem fs = FileSystem.get(conf);
    OutputStream os = fs.create(new Path("input"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("b a\n");
    wr.close();
    job.setMapperClass(TokenCountMapper.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.setInputPaths(job, new Path("input"));
    FileOutputFormat.setOutputPath(job, new Path("output"));
    JobClient.runJob(job);
    FSDataInputStream in = fs.open(new Path("output/part-00000"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    assertEquals("a\t1", reader.readLine());
    assertEquals("b\t1", reader.readLine());
    assertNull(reader.readLine());
    reader.close();
}
Also used : Path(org.apache.hadoop.fs.Path) CompositeConfiguration(org.apache.commons.configuration.CompositeConfiguration) Configuration(org.apache.hadoop.conf.Configuration) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) InputStreamReader(java.io.InputStreamReader) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStream(java.io.OutputStream) BufferedReader(java.io.BufferedReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) OutputStreamWriter(java.io.OutputStreamWriter) JobConf(org.apache.hadoop.mapred.JobConf) JobClient(org.apache.hadoop.mapred.JobClient) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) Test(org.junit.Test)

Example 15 with JobClient

use of org.apache.hadoop.mapred.JobClient in project h2o-2 by h2oai.

the class gen_flatfile method run.

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String trackerIpPort = conf.get("mapred.job.tracker");
    // System.err.println("mapred.job.tracker: " + trackerIpPort);
    String[] arr = trackerIpPort.split(":");
    String host = arr[0];
    int port = Integer.parseInt(arr[1]);
    // System.err.println("host: " + host);
    // System.err.println("port: " + port);
    InetSocketAddress addr = new InetSocketAddress(host, port);
    JobClient client = new JobClient(addr, conf);
    Collection<String> names = client.getClusterStatus(true).getActiveTrackerNames();
    for (String name : names) {
        String n = name.substring("tracker_".length(), name.indexOf(':'));
        String s = InetAddress.getByName(n).getHostAddress();
        System.out.println(s);
    }
    return 0;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InetSocketAddress(java.net.InetSocketAddress) JobClient(org.apache.hadoop.mapred.JobClient)

Aggregations

JobClient (org.apache.hadoop.mapred.JobClient)47 Path (org.apache.hadoop.fs.Path)25 RunningJob (org.apache.hadoop.mapred.RunningJob)20 FileSystem (org.apache.hadoop.fs.FileSystem)18 JobConf (org.apache.hadoop.mapred.JobConf)18 IOException (java.io.IOException)16 Configuration (org.apache.hadoop.conf.Configuration)16 ClusterStatus (org.apache.hadoop.mapred.ClusterStatus)11 Date (java.util.Date)7 Text (org.apache.hadoop.io.Text)6 Counters (org.apache.hadoop.mapred.Counters)6 Test (org.junit.Test)6 DataOutputStream (java.io.DataOutputStream)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 BufferedReader (java.io.BufferedReader)4 InputStreamReader (java.io.InputStreamReader)4 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)4 Context (org.apache.hadoop.hive.ql.Context)4 DriverContext (org.apache.hadoop.hive.ql.DriverContext)4 FileOutputFormat (org.apache.hadoop.mapreduce.lib.output.FileOutputFormat)4