use of java.io.FileReader in project hadoop by apache.
the class TestNewCombinerGrouping method testCombiner.
@Test
public void testCombiner() throws Exception {
if (!new File(TEST_ROOT_DIR).mkdirs()) {
throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
}
File in = new File(TEST_ROOT_DIR, "input");
if (!in.mkdirs()) {
throw new RuntimeException("Could not create test dir: " + in);
}
File out = new File(TEST_ROOT_DIR, "output");
PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
pw.println("A|a,1");
pw.println("A|b,2");
pw.println("B|a,3");
pw.println("B|b,4");
pw.println("B|c,5");
pw.close();
JobConf conf = new JobConf();
conf.set("mapreduce.framework.name", "local");
Job job = new Job(conf);
TextInputFormat.setInputPaths(job, new Path(in.getPath()));
TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setGroupingComparatorClass(GroupComparator.class);
job.setCombinerKeyGroupingComparatorClass(GroupComparator.class);
job.setCombinerClass(Combiner.class);
job.getConfiguration().setInt("min.num.spills.for.combine", 0);
job.submit();
job.waitForCompletion(false);
if (job.isSuccessful()) {
Counters counters = job.getCounters();
long combinerInputRecords = counters.findCounter("org.apache.hadoop.mapreduce.TaskCounter", "COMBINE_INPUT_RECORDS").getValue();
long combinerOutputRecords = counters.findCounter("org.apache.hadoop.mapreduce.TaskCounter", "COMBINE_OUTPUT_RECORDS").getValue();
Assert.assertTrue(combinerInputRecords > 0);
Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
BufferedReader br = new BufferedReader(new FileReader(new File(out, "part-r-00000")));
Set<String> output = new HashSet<String>();
String line = br.readLine();
Assert.assertNotNull(line);
output.add(line.substring(0, 1) + line.substring(4, 5));
line = br.readLine();
Assert.assertNotNull(line);
output.add(line.substring(0, 1) + line.substring(4, 5));
line = br.readLine();
Assert.assertNull(line);
br.close();
Set<String> expected = new HashSet<String>();
expected.add("A2");
expected.add("B5");
Assert.assertEquals(expected, output);
} else {
Assert.fail("Job failed");
}
}
use of java.io.FileReader in project hadoop by apache.
the class TestMRJobClient method checkHistoryJSONFileOutput.
private void checkHistoryJSONFileOutput(String jobId, ByteArrayOutputStream out, File outFile) throws IOException, JSONException {
BufferedReader br = new BufferedReader(new FileReader(outFile));
String line = org.apache.commons.io.IOUtils.toString(br);
br.close();
JSONObject json = new JSONObject(line);
assertEquals(jobId, json.getString("hadoopJob"));
assertEquals(0, out.size());
}
use of java.io.FileReader in project hadoop by apache.
the class TestMRJobClient method checkHistoryHumanFileOutput.
private void checkHistoryHumanFileOutput(String jobId, ByteArrayOutputStream out, File outFile) throws IOException, JSONException {
BufferedReader br = new BufferedReader(new FileReader(outFile));
br.readLine();
String line = br.readLine();
br.close();
assertEquals("Hadoop job: " + jobId, line);
assertEquals(0, out.size());
}
use of java.io.FileReader in project hadoop by apache.
the class TestDistributedShell method getNumOfStringOccurences.
private long getNumOfStringOccurences(File entityFile, String searchString) throws IOException {
BufferedReader reader = null;
String strLine;
long actualCount = 0;
try {
reader = new BufferedReader(new FileReader(entityFile));
while ((strLine = reader.readLine()) != null) {
if (strLine.trim().contains(searchString)) {
actualCount++;
}
}
} finally {
reader.close();
}
return actualCount;
}
use of java.io.FileReader in project hadoop by apache.
the class TestDistributedShell method verifyContainerLog.
private int verifyContainerLog(int containerNum, List<String> expectedContent, boolean count, String expectedWord) {
File logFolder = new File(yarnCluster.getNodeManager(0).getConfig().get(YarnConfiguration.NM_LOG_DIRS, YarnConfiguration.DEFAULT_NM_LOG_DIRS));
File[] listOfFiles = logFolder.listFiles();
int currentContainerLogFileIndex = -1;
for (int i = listOfFiles.length - 1; i >= 0; i--) {
if (listOfFiles[i].listFiles().length == containerNum + 1) {
currentContainerLogFileIndex = i;
break;
}
}
Assert.assertTrue(currentContainerLogFileIndex != -1);
File[] containerFiles = listOfFiles[currentContainerLogFileIndex].listFiles();
int numOfWords = 0;
for (int i = 0; i < containerFiles.length; i++) {
for (File output : containerFiles[i].listFiles()) {
if (output.getName().trim().contains("stdout")) {
BufferedReader br = null;
List<String> stdOutContent = new ArrayList<String>();
try {
String sCurrentLine;
br = new BufferedReader(new FileReader(output));
int numOfline = 0;
while ((sCurrentLine = br.readLine()) != null) {
if (count) {
if (sCurrentLine.contains(expectedWord)) {
numOfWords++;
}
} else if (output.getName().trim().equals("stdout")) {
if (!Shell.WINDOWS) {
Assert.assertEquals("The current is" + sCurrentLine, expectedContent.get(numOfline), sCurrentLine.trim());
numOfline++;
} else {
stdOutContent.add(sCurrentLine.trim());
}
}
}
/* By executing bat script using cmd /c,
* it will output all contents from bat script first
* It is hard for us to do check line by line
* Simply check whether output from bat file contains
* all the expected messages
*/
if (Shell.WINDOWS && !count && output.getName().trim().equals("stdout")) {
Assert.assertTrue(stdOutContent.containsAll(expectedContent));
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null)
br.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
}
}
return numOfWords;
}
Aggregations