use of org.apache.hadoop.io.DataOutputBuffer in project hadoop by apache.
the class TestMRSequenceFileAsBinaryOutputFormat method testBinary.
@Test
public void testBinary() throws IOException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
FileOutputFormat.setOutputPath(job, outdir);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
BytesWritable bkey = new BytesWritable();
BytesWritable bval = new BytesWritable();
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat();
OutputCommitter committer = outputFormat.getOutputCommitter(context);
committer.setupJob(job);
RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);
IntWritable iwritable = new IntWritable();
DoubleWritable dwritable = new DoubleWritable();
DataOutputBuffer outbuf = new DataOutputBuffer();
LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
try {
for (int i = 0; i < RECORDS; ++i) {
iwritable = new IntWritable(r.nextInt());
iwritable.write(outbuf);
bkey.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
dwritable = new DoubleWritable(r.nextDouble());
dwritable.write(outbuf);
bval.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
writer.write(bkey, bval);
}
} finally {
writer.close(context);
}
committer.commitTask(context);
committer.commitJob(job);
InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
int count = 0;
r.setSeed(seed);
SequenceFileInputFormat.setInputPaths(job, outdir);
LOG.info("Reading data by SequenceFileInputFormat");
for (InputSplit split : iformat.getSplits(job)) {
RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
try {
int sourceInt;
double sourceDouble;
while (reader.nextKeyValue()) {
sourceInt = r.nextInt();
sourceDouble = r.nextDouble();
iwritable = reader.getCurrentKey();
dwritable = reader.getCurrentValue();
assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
++count;
}
} finally {
reader.close();
}
}
assertEquals("Some records not found", RECORDS, count);
}
use of org.apache.hadoop.io.DataOutputBuffer in project hadoop by apache.
the class TestShuffleHandler method testRecoveryFromOtherVersions.
@Test
public void testRecoveryFromOtherVersions() throws IOException {
final String user = "someuser";
final ApplicationId appId = ApplicationId.newInstance(12345, 1);
final File tmpDir = new File(System.getProperty("test.build.data", System.getProperty("java.io.tmpdir")), TestShuffleHandler.class.getName());
Configuration conf = new Configuration();
conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);
conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3);
ShuffleHandler shuffle = new ShuffleHandler();
// emulate aux services startup with recovery enabled
shuffle.setRecoveryPath(new Path(tmpDir.toString()));
tmpDir.mkdirs();
try {
shuffle.init(conf);
shuffle.start();
// setup a shuffle token for an application
DataOutputBuffer outputBuffer = new DataOutputBuffer();
outputBuffer.reset();
Token<JobTokenIdentifier> jt = new Token<JobTokenIdentifier>("identifier".getBytes(), "password".getBytes(), new Text(user), new Text("shuffleService"));
jt.write(outputBuffer);
shuffle.initializeApplication(new ApplicationInitializationContext(user, appId, ByteBuffer.wrap(outputBuffer.getData(), 0, outputBuffer.getLength())));
// verify we are authorized to shuffle
int rc = getShuffleResponseCode(shuffle, jt);
Assert.assertEquals(HttpURLConnection.HTTP_OK, rc);
// emulate shuffle handler restart
shuffle.close();
shuffle = new ShuffleHandler();
shuffle.setRecoveryPath(new Path(tmpDir.toString()));
shuffle.init(conf);
shuffle.start();
// verify we are still authorized to shuffle to the old application
rc = getShuffleResponseCode(shuffle, jt);
Assert.assertEquals(HttpURLConnection.HTTP_OK, rc);
Version version = Version.newInstance(1, 0);
Assert.assertEquals(version, shuffle.getCurrentVersion());
// emulate shuffle handler restart with compatible version
Version version11 = Version.newInstance(1, 1);
// update version info before close shuffle
shuffle.storeVersion(version11);
Assert.assertEquals(version11, shuffle.loadVersion());
shuffle.close();
shuffle = new ShuffleHandler();
shuffle.setRecoveryPath(new Path(tmpDir.toString()));
shuffle.init(conf);
shuffle.start();
// shuffle version will be override by CURRENT_VERSION_INFO after restart
// successfully.
Assert.assertEquals(version, shuffle.loadVersion());
// verify we are still authorized to shuffle to the old application
rc = getShuffleResponseCode(shuffle, jt);
Assert.assertEquals(HttpURLConnection.HTTP_OK, rc);
// emulate shuffle handler restart with incompatible version
Version version21 = Version.newInstance(2, 1);
shuffle.storeVersion(version21);
Assert.assertEquals(version21, shuffle.loadVersion());
shuffle.close();
shuffle = new ShuffleHandler();
shuffle.setRecoveryPath(new Path(tmpDir.toString()));
shuffle.init(conf);
try {
shuffle.start();
Assert.fail("Incompatible version, should expect fail here.");
} catch (ServiceStateException e) {
Assert.assertTrue("Exception message mismatch", e.getMessage().contains("Incompatible version for state DB schema:"));
}
} finally {
if (shuffle != null) {
shuffle.close();
}
FileUtil.fullyDelete(tmpDir);
}
}
use of org.apache.hadoop.io.DataOutputBuffer in project hadoop by apache.
the class TestShuffleHandler method testKeepAlive.
@Test(timeout = 10000)
public void testKeepAlive() throws Exception {
final ArrayList<Throwable> failures = new ArrayList<Throwable>(1);
Configuration conf = new Configuration();
conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);
conf.setBoolean(ShuffleHandler.SHUFFLE_CONNECTION_KEEP_ALIVE_ENABLED, true);
// try setting to -ve keep alive timeout.
conf.setInt(ShuffleHandler.SHUFFLE_CONNECTION_KEEP_ALIVE_TIME_OUT, -100);
ShuffleHandler shuffleHandler = new ShuffleHandler() {
@Override
protected Shuffle getShuffle(final Configuration conf) {
// replace the shuffle handler with one stubbed for testing
return new Shuffle(conf) {
@Override
protected MapOutputInfo getMapOutputInfo(String mapId, int reduce, String jobId, String user) throws IOException {
return null;
}
@Override
protected void verifyRequest(String appid, ChannelHandlerContext ctx, HttpRequest request, HttpResponse response, URL requestUri) throws IOException {
}
@Override
protected void populateHeaders(List<String> mapIds, String jobId, String user, int reduce, HttpRequest request, HttpResponse response, boolean keepAliveParam, Map<String, MapOutputInfo> infoMap) throws IOException {
// Send some dummy data (populate content length details)
ShuffleHeader header = new ShuffleHeader("attempt_12345_1_m_1_0", 5678, 5678, 1);
DataOutputBuffer dob = new DataOutputBuffer();
header.write(dob);
dob = new DataOutputBuffer();
for (int i = 0; i < 100000; ++i) {
header.write(dob);
}
long contentLength = dob.getLength();
// disable connectinKeepAliveEnabled if keepAliveParam is available
if (keepAliveParam) {
connectionKeepAliveEnabled = false;
}
super.setResponseHeaders(response, keepAliveParam, contentLength);
}
@Override
protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch, String user, String mapId, int reduce, MapOutputInfo info) throws IOException {
HttpResponse response = new DefaultHttpResponse(HTTP_1_1, OK);
// send a shuffle header and a lot of data down the channel
// to trigger a broken pipe
ShuffleHeader header = new ShuffleHeader("attempt_12345_1_m_1_0", 5678, 5678, 1);
DataOutputBuffer dob = new DataOutputBuffer();
header.write(dob);
ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength()));
dob = new DataOutputBuffer();
for (int i = 0; i < 100000; ++i) {
header.write(dob);
}
return ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength()));
}
@Override
protected void sendError(ChannelHandlerContext ctx, HttpResponseStatus status) {
if (failures.size() == 0) {
failures.add(new Error());
ctx.getChannel().close();
}
}
@Override
protected void sendError(ChannelHandlerContext ctx, String message, HttpResponseStatus status) {
if (failures.size() == 0) {
failures.add(new Error());
ctx.getChannel().close();
}
}
};
}
};
shuffleHandler.init(conf);
shuffleHandler.start();
String shuffleBaseURL = "http://127.0.0.1:" + shuffleHandler.getConfig().get(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY);
URL url = new URL(shuffleBaseURL + "/mapOutput?job=job_12345_1&reduce=1&" + "map=attempt_12345_1_m_1_0");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_NAME, ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_VERSION, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
conn.connect();
DataInputStream input = new DataInputStream(conn.getInputStream());
Assert.assertEquals(HttpHeader.KEEP_ALIVE.asString(), conn.getHeaderField(HttpHeader.CONNECTION.asString()));
Assert.assertEquals("timeout=1", conn.getHeaderField(HttpHeader.KEEP_ALIVE.asString()));
Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode());
ShuffleHeader header = new ShuffleHeader();
header.readFields(input);
input.close();
// For keepAlive via URL
url = new URL(shuffleBaseURL + "/mapOutput?job=job_12345_1&reduce=1&" + "map=attempt_12345_1_m_1_0&keepAlive=true");
conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_NAME, ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_VERSION, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
conn.connect();
input = new DataInputStream(conn.getInputStream());
Assert.assertEquals(HttpHeader.KEEP_ALIVE.asString(), conn.getHeaderField(HttpHeader.CONNECTION.asString()));
Assert.assertEquals("timeout=1", conn.getHeaderField(HttpHeader.KEEP_ALIVE.asString()));
Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode());
header = new ShuffleHeader();
header.readFields(input);
input.close();
}
use of org.apache.hadoop.io.DataOutputBuffer in project hadoop by apache.
the class TestShuffleHandler method testGetMapOutputInfo.
@Test(timeout = 100000)
public void testGetMapOutputInfo() throws Exception {
final ArrayList<Throwable> failures = new ArrayList<Throwable>(1);
Configuration conf = new Configuration();
conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);
conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3);
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "simple");
UserGroupInformation.setConfiguration(conf);
File absLogDir = new File("target", TestShuffleHandler.class.getSimpleName() + "LocDir").getAbsoluteFile();
conf.set(YarnConfiguration.NM_LOCAL_DIRS, absLogDir.getAbsolutePath());
ApplicationId appId = ApplicationId.newInstance(12345, 1);
String appAttemptId = "attempt_12345_1_m_1_0";
String user = "randomUser";
String reducerId = "0";
List<File> fileMap = new ArrayList<File>();
createShuffleHandlerFiles(absLogDir, user, appId.toString(), appAttemptId, conf, fileMap);
ShuffleHandler shuffleHandler = new ShuffleHandler() {
@Override
protected Shuffle getShuffle(Configuration conf) {
// replace the shuffle handler with one stubbed for testing
return new Shuffle(conf) {
@Override
protected void populateHeaders(List<String> mapIds, String outputBaseStr, String user, int reduce, HttpRequest request, HttpResponse response, boolean keepAliveParam, Map<String, MapOutputInfo> infoMap) throws IOException {
// Only set response headers and skip everything else
// send some dummy value for content-length
super.setResponseHeaders(response, keepAliveParam, 100);
}
@Override
protected void verifyRequest(String appid, ChannelHandlerContext ctx, HttpRequest request, HttpResponse response, URL requestUri) throws IOException {
// Do nothing.
}
@Override
protected void sendError(ChannelHandlerContext ctx, String message, HttpResponseStatus status) {
if (failures.size() == 0) {
failures.add(new Error(message));
ctx.getChannel().close();
}
}
@Override
protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch, String user, String mapId, int reduce, MapOutputInfo info) throws IOException {
// send a shuffle header
ShuffleHeader header = new ShuffleHeader("attempt_12345_1_m_1_0", 5678, 5678, 1);
DataOutputBuffer dob = new DataOutputBuffer();
header.write(dob);
return ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength()));
}
};
}
};
shuffleHandler.init(conf);
try {
shuffleHandler.start();
DataOutputBuffer outputBuffer = new DataOutputBuffer();
outputBuffer.reset();
Token<JobTokenIdentifier> jt = new Token<JobTokenIdentifier>("identifier".getBytes(), "password".getBytes(), new Text(user), new Text("shuffleService"));
jt.write(outputBuffer);
shuffleHandler.initializeApplication(new ApplicationInitializationContext(user, appId, ByteBuffer.wrap(outputBuffer.getData(), 0, outputBuffer.getLength())));
URL url = new URL("http://127.0.0.1:" + shuffleHandler.getConfig().get(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY) + "/mapOutput?job=job_12345_0001&reduce=" + reducerId + "&map=attempt_12345_1_m_1_0");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_NAME, ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_VERSION, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
conn.connect();
try {
DataInputStream is = new DataInputStream(conn.getInputStream());
ShuffleHeader header = new ShuffleHeader();
header.readFields(is);
is.close();
} catch (EOFException e) {
// ignore
}
Assert.assertEquals("sendError called due to shuffle error", 0, failures.size());
} finally {
shuffleHandler.stop();
FileUtil.fullyDelete(absLogDir);
}
}
use of org.apache.hadoop.io.DataOutputBuffer in project hadoop by apache.
the class TestShuffleHandler method testClientClosesConnection.
/**
* Verify client prematurely closing a connection.
*
* @throws Exception exception.
*/
@Test(timeout = 10000)
public void testClientClosesConnection() throws Exception {
final ArrayList<Throwable> failures = new ArrayList<Throwable>(1);
Configuration conf = new Configuration();
conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);
ShuffleHandler shuffleHandler = new ShuffleHandler() {
@Override
protected Shuffle getShuffle(Configuration conf) {
// replace the shuffle handler with one stubbed for testing
return new Shuffle(conf) {
@Override
protected MapOutputInfo getMapOutputInfo(String mapId, int reduce, String jobId, String user) throws IOException {
return null;
}
@Override
protected void populateHeaders(List<String> mapIds, String jobId, String user, int reduce, HttpRequest request, HttpResponse response, boolean keepAliveParam, Map<String, MapOutputInfo> infoMap) throws IOException {
// Only set response headers and skip everything else
// send some dummy value for content-length
super.setResponseHeaders(response, keepAliveParam, 100);
}
@Override
protected void verifyRequest(String appid, ChannelHandlerContext ctx, HttpRequest request, HttpResponse response, URL requestUri) throws IOException {
}
@Override
protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch, String user, String mapId, int reduce, MapOutputInfo info) throws IOException {
// send a shuffle header and a lot of data down the channel
// to trigger a broken pipe
ShuffleHeader header = new ShuffleHeader("attempt_12345_1_m_1_0", 5678, 5678, 1);
DataOutputBuffer dob = new DataOutputBuffer();
header.write(dob);
ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength()));
dob = new DataOutputBuffer();
for (int i = 0; i < 100000; ++i) {
header.write(dob);
}
return ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength()));
}
@Override
protected void sendError(ChannelHandlerContext ctx, HttpResponseStatus status) {
if (failures.size() == 0) {
failures.add(new Error());
ctx.getChannel().close();
}
}
@Override
protected void sendError(ChannelHandlerContext ctx, String message, HttpResponseStatus status) {
if (failures.size() == 0) {
failures.add(new Error());
ctx.getChannel().close();
}
}
};
}
};
shuffleHandler.init(conf);
shuffleHandler.start();
// simulate a reducer that closes early by reading a single shuffle header
// then closing the connection
URL url = new URL("http://127.0.0.1:" + shuffleHandler.getConfig().get(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY) + "/mapOutput?job=job_12345_1&reduce=1&map=attempt_12345_1_m_1_0");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_NAME, ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_VERSION, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
conn.connect();
DataInputStream input = new DataInputStream(conn.getInputStream());
Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode());
Assert.assertEquals("close", conn.getHeaderField(HttpHeader.CONNECTION.asString()));
ShuffleHeader header = new ShuffleHeader();
header.readFields(input);
input.close();
shuffleHandler.stop();
Assert.assertTrue("sendError called when client closed connection", failures.size() == 0);
}
Aggregations