use of java.io.BufferedReader in project hadoop by apache.
the class TestCompressionEmulationUtils method testRandomCompressedTextDataGenerator.
/**
* Test {@link RandomTextDataMapper} via {@link CompressionEmulationUtil}.
*/
@Test
public void testRandomCompressedTextDataGenerator() throws Exception {
int wordSize = 10;
int listSize = 20;
long dataSize = 10 * 1024 * 1024;
Configuration conf = new Configuration();
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);
// configure the RandomTextDataGenerator to generate desired sized data
conf.setInt(RandomTextDataGenerator.GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE, listSize);
conf.setInt(RandomTextDataGenerator.GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE, wordSize);
conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize);
conf.set("mapreduce.job.hdfs-servers", "");
FileSystem lfs = FileSystem.getLocal(conf);
// define the test's root temp directory
Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
Path tempDir = new Path(rootTempDir, "TestRandomCompressedTextDataGenr");
lfs.delete(tempDir, true);
runDataGenJob(conf, tempDir);
// validate the output data
FileStatus[] files = lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter());
long size = 0;
long maxLineSize = 0;
for (FileStatus status : files) {
InputStream in = CompressionEmulationUtil.getPossiblyDecompressedInputStream(status.getPath(), conf, 0);
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line = reader.readLine();
if (line != null) {
long lineSize = line.getBytes().length;
if (lineSize > maxLineSize) {
maxLineSize = lineSize;
}
while (line != null) {
for (String word : line.split("\\s")) {
size += word.getBytes().length;
}
line = reader.readLine();
}
}
reader.close();
}
assertTrue(size >= dataSize);
assertTrue(size <= dataSize + maxLineSize);
}
use of java.io.BufferedReader in project hadoop by apache.
the class TestAggregatedLogFormat method testContainerLogsFileAccess.
@Test(timeout = 10000)
public void testContainerLogsFileAccess() throws IOException {
// This test will run only if NativeIO is enabled as SecureIOUtils
// require it to be enabled.
Assume.assumeTrue(NativeIO.isAvailable());
Configuration conf = new Configuration();
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
UserGroupInformation.setConfiguration(conf);
File workDir = new File(testWorkDir, "testContainerLogsFileAccess1");
Path remoteAppLogFile = new Path(workDir.getAbsolutePath(), "aggregatedLogFile");
Path srcFileRoot = new Path(workDir.getAbsolutePath(), "srcFiles");
String data = "Log File content for container : ";
// Creating files for container1. Log aggregator will try to read log files
// with illegal user.
ApplicationId applicationId = ApplicationId.newInstance(1, 1);
ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(applicationId, 1);
ContainerId testContainerId1 = ContainerId.newContainerId(applicationAttemptId, 1);
Path appDir = new Path(srcFileRoot, testContainerId1.getApplicationAttemptId().getApplicationId().toString());
Path srcFilePath1 = new Path(appDir, testContainerId1.toString());
String stdout = "stdout";
String stderr = "stderr";
writeSrcFile(srcFilePath1, stdout, data + testContainerId1.toString() + stdout);
writeSrcFile(srcFilePath1, stderr, data + testContainerId1.toString() + stderr);
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
LogWriter logWriter = new LogWriter(conf, remoteAppLogFile, ugi);
LogKey logKey = new LogKey(testContainerId1);
String randomUser = "randomUser";
LogValue logValue = spy(new LogValue(Collections.singletonList(srcFileRoot.toString()), testContainerId1, randomUser));
// It is trying simulate a situation where first log file is owned by
// different user (probably symlink) and second one by the user itself.
// The first file should not be aggregated. Because this log file has the invalid
// user name.
when(logValue.getUser()).thenReturn(randomUser).thenReturn(ugi.getShortUserName());
logWriter.append(logKey, logValue);
logWriter.close();
BufferedReader in = new BufferedReader(new FileReader(new File(remoteAppLogFile.toUri().getRawPath())));
String line;
StringBuffer sb = new StringBuffer("");
while ((line = in.readLine()) != null) {
LOG.info(line);
sb.append(line);
}
line = sb.toString();
String expectedOwner = ugi.getShortUserName();
if (Path.WINDOWS) {
final String adminsGroupString = "Administrators";
if (Arrays.asList(ugi.getGroupNames()).contains(adminsGroupString)) {
expectedOwner = adminsGroupString;
}
}
// This file: stderr should not be aggregated.
// And we will not aggregate the log message.
String stdoutFile1 = StringUtils.join(File.separator, Arrays.asList(new String[] { workDir.getAbsolutePath(), "srcFiles", testContainerId1.getApplicationAttemptId().getApplicationId().toString(), testContainerId1.toString(), stderr }));
// The file: stdout is expected to be aggregated.
String stdoutFile2 = StringUtils.join(File.separator, Arrays.asList(new String[] { workDir.getAbsolutePath(), "srcFiles", testContainerId1.getApplicationAttemptId().getApplicationId().toString(), testContainerId1.toString(), stdout }));
String message2 = "Owner '" + expectedOwner + "' for path " + stdoutFile2 + " did not match expected owner '" + ugi.getShortUserName() + "'";
Assert.assertFalse(line.contains(message2));
Assert.assertFalse(line.contains(data + testContainerId1.toString() + stderr));
Assert.assertTrue(line.contains(data + testContainerId1.toString() + stdout));
}
use of java.io.BufferedReader in project hadoop by apache.
the class TestContainersMonitor method testContainerKillOnMemoryOverflow.
@Test
public void testContainerKillOnMemoryOverflow() throws IOException, InterruptedException, YarnException {
if (!ProcfsBasedProcessTree.isAvailable()) {
return;
}
containerManager.start();
File scriptFile = new File(tmpDir, "scriptFile.sh");
PrintWriter fileWriter = new PrintWriter(scriptFile);
File processStartFile = new File(tmpDir, "start_file.txt").getAbsoluteFile();
// So that start file is readable by the
fileWriter.write("\numask 0");
// test.
fileWriter.write("\necho Hello World! > " + processStartFile);
fileWriter.write("\necho $$ >> " + processStartFile);
fileWriter.write("\nsleep 15");
fileWriter.close();
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
// ////// Construct the Container-id
ApplicationId appId = ApplicationId.newInstance(0, 0);
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cId = ContainerId.newContainerId(appAttemptId, 0);
URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(scriptFile.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
List<String> commands = new ArrayList<String>();
commands.add("/bin/bash");
commands.add(scriptFile.getAbsolutePath());
containerLaunchContext.setCommands(commands);
Resource r = BuilderUtils.newResource(0, 0);
ContainerTokenIdentifier containerIdentifier = new ContainerTokenIdentifier(cId, context.getNodeId().toString(), user, r, System.currentTimeMillis() + 120000, 123, DUMMY_RM_IDENTIFIER, Priority.newInstance(0), 0);
Token containerToken = BuilderUtils.newContainerToken(context.getNodeId(), containerManager.getContext().getContainerTokenSecretManager().createPassword(containerIdentifier), containerIdentifier);
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, containerToken);
List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
int timeoutSecs = 0;
while (!processStartFile.exists() && timeoutSecs++ < 20) {
Thread.sleep(1000);
LOG.info("Waiting for process start-file to be created");
}
Assert.assertTrue("ProcessStartFile doesn't exist!", processStartFile.exists());
// Now verify the contents of the file
BufferedReader reader = new BufferedReader(new FileReader(processStartFile));
Assert.assertEquals("Hello World!", reader.readLine());
// Get the pid of the process
String pid = reader.readLine().trim();
// No more lines
Assert.assertEquals(null, reader.readLine());
BaseContainerManagerTest.waitForContainerState(containerManager, cId, ContainerState.COMPLETE, 60);
List<ContainerId> containerIds = new ArrayList<ContainerId>();
containerIds.add(cId);
GetContainerStatusesRequest gcsRequest = GetContainerStatusesRequest.newInstance(containerIds);
ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
Assert.assertEquals(ContainerExitStatus.KILLED_EXCEEDED_VMEM, containerStatus.getExitStatus());
String expectedMsgPattern = "Container \\[pid=" + pid + ",containerID=" + cId + "\\] is running beyond virtual memory limits. Current usage: " + "[0-9.]+ ?[KMGTPE]?B of [0-9.]+ ?[KMGTPE]?B physical memory used; " + "[0-9.]+ ?[KMGTPE]?B of [0-9.]+ ?[KMGTPE]?B virtual memory used. " + "Killing container.\nDump of the process-tree for " + cId + " :\n";
Pattern pat = Pattern.compile(expectedMsgPattern);
Assert.assertEquals("Expected message pattern is: " + expectedMsgPattern + "\n\nObserved message is: " + containerStatus.getDiagnostics(), true, pat.matcher(containerStatus.getDiagnostics()).find());
// Assert that the process is not alive anymore
Assert.assertFalse("Process is still alive!", exec.signalContainer(new ContainerSignalContext.Builder().setUser(user).setPid(pid).setSignal(Signal.NULL).build()));
}
use of java.io.BufferedReader in project hadoop by apache.
the class TestClusterMapReduceTestCase method _testMapReduce.
public void _testMapReduce(boolean restart) throws Exception {
OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt"));
Writer wr = new OutputStreamWriter(os);
wr.write("hello1\n");
wr.write("hello2\n");
wr.write("hello3\n");
wr.write("hello4\n");
wr.close();
if (restart) {
stopCluster();
startCluster(false, null);
}
JobConf conf = createJobConf();
conf.setJobName("mr");
conf.setInputFormat(TextInputFormat.class);
conf.setMapOutputKeyClass(LongWritable.class);
conf.setMapOutputValueClass(Text.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(Text.class);
conf.setMapperClass(org.apache.hadoop.mapred.lib.IdentityMapper.class);
conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);
FileInputFormat.setInputPaths(conf, getInputDir());
FileOutputFormat.setOutputPath(conf, getOutputDir());
JobClient.runJob(conf);
Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(getOutputDir(), new Utils.OutputFileUtils.OutputFilesFilter()));
if (outputFiles.length > 0) {
InputStream is = getFileSystem().open(outputFiles[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = reader.readLine();
int counter = 0;
while (line != null) {
counter++;
assertTrue(line.contains("hello"));
line = reader.readLine();
}
reader.close();
assertEquals(4, counter);
}
}
use of java.io.BufferedReader in project hadoop by apache.
the class TestMapRed method printTextFile.
private static void printTextFile(FileSystem fs, Path p) throws IOException {
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(p)));
String line;
while ((line = in.readLine()) != null) {
System.out.println(" Row: " + line);
}
in.close();
}
Aggregations