Search in sources :

Example 61 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestNetworkedJob method testNetworkedJob.

/**
 * test JobConf 
 * @throws Exception
 */
@SuppressWarnings("deprecation")
@Test(timeout = 500000)
public void testNetworkedJob() throws Exception {
    // mock creation
    MiniMRClientCluster mr = null;
    FileSystem fileSys = null;
    try {
        mr = createMiniClusterWithCapacityScheduler();
        JobConf job = new JobConf(mr.getConfig());
        fileSys = FileSystem.get(job);
        fileSys.delete(testDir, true);
        FSDataOutputStream out = fileSys.create(inFile, true);
        out.writeBytes("This is a test file");
        out.close();
        FileInputFormat.setInputPaths(job, inFile);
        FileOutputFormat.setOutputPath(job, outDir);
        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);
        job.setMapperClass(IdentityMapper.class);
        job.setReducerClass(IdentityReducer.class);
        job.setNumReduceTasks(0);
        JobClient client = new JobClient(mr.getConfig());
        RunningJob rj = client.submitJob(job);
        JobID jobId = rj.getID();
        NetworkedJob runningJob = (NetworkedJob) client.getJob(jobId);
        runningJob.setJobPriority(JobPriority.HIGH.name());
        // test getters
        assertTrue(runningJob.getConfiguration().toString().endsWith("0001/job.xml"));
        assertEquals(jobId, runningJob.getID());
        assertEquals(jobId.toString(), runningJob.getJobID());
        assertEquals("N/A", runningJob.getJobName());
        assertTrue(runningJob.getJobFile().endsWith(".staging/" + runningJob.getJobID() + "/job.xml"));
        assertTrue(runningJob.getTrackingURL().length() > 0);
        assertTrue(runningJob.mapProgress() == 0.0f);
        assertTrue(runningJob.reduceProgress() == 0.0f);
        assertTrue(runningJob.cleanupProgress() == 0.0f);
        assertTrue(runningJob.setupProgress() == 0.0f);
        TaskCompletionEvent[] tce = runningJob.getTaskCompletionEvents(0);
        assertEquals(tce.length, 0);
        assertEquals("", runningJob.getHistoryUrl());
        assertFalse(runningJob.isRetired());
        assertEquals("", runningJob.getFailureInfo());
        assertEquals("N/A", runningJob.getJobStatus().getJobName());
        assertEquals(0, client.getMapTaskReports(jobId).length);
        try {
            client.getSetupTaskReports(jobId);
        } catch (YarnRuntimeException e) {
            assertEquals("Unrecognized task type: JOB_SETUP", e.getMessage());
        }
        try {
            client.getCleanupTaskReports(jobId);
        } catch (YarnRuntimeException e) {
            assertEquals("Unrecognized task type: JOB_CLEANUP", e.getMessage());
        }
        assertEquals(0, client.getReduceTaskReports(jobId).length);
        // test ClusterStatus
        ClusterStatus status = client.getClusterStatus(true);
        assertEquals(2, status.getActiveTrackerNames().size());
        // it method does not implemented and always return empty array or null;
        assertEquals(0, status.getBlacklistedTrackers());
        assertEquals(0, status.getBlacklistedTrackerNames().size());
        assertEquals(0, status.getBlackListedTrackersInfo().size());
        assertEquals(JobTrackerStatus.RUNNING, status.getJobTrackerStatus());
        assertEquals(1, status.getMapTasks());
        assertEquals(20, status.getMaxMapTasks());
        assertEquals(4, status.getMaxReduceTasks());
        assertEquals(0, status.getNumExcludedNodes());
        assertEquals(1, status.getReduceTasks());
        assertEquals(2, status.getTaskTrackers());
        assertEquals(0, status.getTTExpiryInterval());
        assertEquals(JobTrackerStatus.RUNNING, status.getJobTrackerStatus());
        assertEquals(0, status.getGraylistedTrackers());
        // test read and write
        ByteArrayOutputStream dataOut = new ByteArrayOutputStream();
        status.write(new DataOutputStream(dataOut));
        ClusterStatus status2 = new ClusterStatus();
        status2.readFields(new DataInputStream(new ByteArrayInputStream(dataOut.toByteArray())));
        assertEquals(status.getActiveTrackerNames(), status2.getActiveTrackerNames());
        assertEquals(status.getBlackListedTrackersInfo(), status2.getBlackListedTrackersInfo());
        assertEquals(status.getMapTasks(), status2.getMapTasks());
        // test taskStatusfilter
        JobClient.setTaskOutputFilter(job, TaskStatusFilter.ALL);
        assertEquals(TaskStatusFilter.ALL, JobClient.getTaskOutputFilter(job));
        // runningJob.setJobPriority(JobPriority.HIGH.name());
        // test default map
        assertEquals(20, client.getDefaultMaps());
        assertEquals(4, client.getDefaultReduces());
        assertEquals("jobSubmitDir", client.getSystemDir().getName());
        // test queue information
        JobQueueInfo[] rootQueueInfo = client.getRootQueues();
        assertEquals(1, rootQueueInfo.length);
        assertEquals("default", rootQueueInfo[0].getQueueName());
        JobQueueInfo[] qinfo = client.getQueues();
        assertEquals(1, qinfo.length);
        assertEquals("default", qinfo[0].getQueueName());
        assertEquals(0, client.getChildQueues("default").length);
        assertEquals(1, client.getJobsFromQueue("default").length);
        assertTrue(client.getJobsFromQueue("default")[0].getJobFile().endsWith("/job.xml"));
        JobQueueInfo qi = client.getQueueInfo("default");
        assertEquals("default", qi.getQueueName());
        assertEquals("running", qi.getQueueState());
        QueueAclsInfo[] aai = client.getQueueAclsForCurrentUser();
        assertEquals(2, aai.length);
        assertEquals("root", aai[0].getQueueName());
        assertEquals("default", aai[1].getQueueName());
        // test JobClient
        // The following asserts read JobStatus twice and ensure the returned
        // JobStatus objects correspond to the same Job.
        assertEquals("Expected matching JobIDs", jobId, client.getJob(jobId).getJobStatus().getJobID());
        assertEquals("Expected matching startTimes", rj.getJobStatus().getStartTime(), client.getJob(jobId).getJobStatus().getStartTime());
    } finally {
        if (fileSys != null) {
            fileSys.delete(testDir, true);
        }
        if (mr != null) {
            mr.stop();
        }
    }
}
Also used : FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) ByteArrayInputStream(java.io.ByteArrayInputStream) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) NetworkedJob(org.apache.hadoop.mapred.JobClient.NetworkedJob) Test(org.junit.Test)

Example 62 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestSpecialCharactersInOutputPath method launchJob.

public static boolean launchJob(URI fileSys, JobConf conf, int numMaps, int numReduces) throws IOException {
    final Path inDir = new Path("/testing/input");
    final Path outDir = new Path("/testing/output");
    FileSystem fs = FileSystem.get(fileSys, conf);
    fs.delete(outDir, true);
    if (!fs.mkdirs(inDir)) {
        LOG.warn("Can't create " + inDir);
        return false;
    }
    // generate an input file
    DataOutputStream file = fs.create(new Path(inDir, "part-0"));
    file.writeBytes("foo foo2 foo3");
    file.close();
    // use WordCount example
    FileSystem.setDefaultUri(conf, fileSys);
    conf.setJobName("foo");
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(SpecialTextOutputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(IdentityReducer.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    conf.setNumMapTasks(numMaps);
    conf.setNumReduceTasks(numReduces);
    // run job and wait for completion
    RunningJob runningJob = JobClient.runJob(conf);
    try {
        assertTrue(runningJob.isComplete());
        assertTrue(runningJob.isSuccessful());
        assertTrue("Output folder not found!", fs.exists(new Path("/testing/output/" + OUTPUT_FILENAME)));
    } catch (NullPointerException npe) {
        // This NPE should no more happens
        fail("A NPE should not have happened.");
    }
    // return job result
    LOG.info("job is complete: " + runningJob.isSuccessful());
    return (runningJob.isSuccessful());
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 63 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestReporter method testStatusLimit.

@Test
public void testStatusLimit() throws IOException, InterruptedException, ClassNotFoundException {
    Path test = new Path(testRootTempDir, "testStatusLimit");
    Configuration conf = new Configuration();
    Path inDir = new Path(test, "in");
    Path outDir = new Path(test, "out");
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(inDir)) {
        fs.delete(inDir, true);
    }
    fs.mkdirs(inDir);
    DataOutputStream file = fs.create(new Path(inDir, "part-" + 0));
    file.writeBytes("testStatusLimit");
    file.close();
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    Job job = Job.getInstance(conf, "testStatusLimit");
    job.setMapperClass(StatusLimitMapper.class);
    job.setNumReduceTasks(0);
    FileInputFormat.addInputPath(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);
    job.waitForCompletion(true);
    assertTrue("Job failed", job.isSuccessful());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 64 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestTupleWritable method testWideWritable2.

@Test
public void testWideWritable2() throws Exception {
    Writable[] manyWrits = makeRandomWritables(71);
    TupleWritable sTuple = new TupleWritable(manyWrits);
    for (int i = 0; i < manyWrits.length; i++) {
        sTuple.setWritten(i);
    }
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    sTuple.write(new DataOutputStream(out));
    ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
    TupleWritable dTuple = new TupleWritable();
    dTuple.readFields(new DataInputStream(in));
    assertTrue("Failed to write/read tuple", sTuple.equals(dTuple));
    assertEquals("All tuple data has not been read from the stream", -1, in.read());
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) DataOutputStream(java.io.DataOutputStream) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) IntWritable(org.apache.hadoop.io.IntWritable) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) Test(org.junit.Test)

Example 65 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestTupleWritable method testPreVersion21CompatibilityEmptyTuple.

@Test
public void testPreVersion21CompatibilityEmptyTuple() throws Exception {
    Writable[] manyWrits = new Writable[0];
    PreVersion21TupleWritable oldTuple = new PreVersion21TupleWritable(manyWrits);
    // don't set any values written
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    oldTuple.write(new DataOutputStream(out));
    ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
    TupleWritable dTuple = new TupleWritable();
    dTuple.readFields(new DataInputStream(in));
    assertTrue("Tuple writable is unable to read pre-0.21 versions of TupleWritable", oldTuple.isCompatible(dTuple));
    assertEquals("All tuple data has not been read from the stream", -1, in.read());
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) DataOutputStream(java.io.DataOutputStream) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) IntWritable(org.apache.hadoop.io.IntWritable) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) Test(org.junit.Test)

Aggregations

DataOutputStream (java.io.DataOutputStream)2968 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1314 IOException (java.io.IOException)1024 Test (org.junit.Test)633 DataInputStream (java.io.DataInputStream)615 FileOutputStream (java.io.FileOutputStream)427 ByteArrayInputStream (java.io.ByteArrayInputStream)411 File (java.io.File)281 BufferedOutputStream (java.io.BufferedOutputStream)228 UnitTest (org.apache.geode.test.junit.categories.UnitTest)172 URL (java.net.URL)149 InputStreamReader (java.io.InputStreamReader)146 BufferedReader (java.io.BufferedReader)142 Path (org.apache.hadoop.fs.Path)137 DataInput (java.io.DataInput)124 ArrayList (java.util.ArrayList)122 HttpURLConnection (java.net.HttpURLConnection)120 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)117 FileInputStream (java.io.FileInputStream)107 InputStream (java.io.InputStream)107