Search in sources :

Example 51 with ApplicationId

use of org.apache.hadoop.yarn.api.records.ApplicationId in project hadoop by apache.

the class TestShuffleHandler method testRecoveryFromOtherVersions.

@Test
public void testRecoveryFromOtherVersions() throws IOException {
    final String user = "someuser";
    final ApplicationId appId = ApplicationId.newInstance(12345, 1);
    final File tmpDir = new File(System.getProperty("test.build.data", System.getProperty("java.io.tmpdir")), TestShuffleHandler.class.getName());
    Configuration conf = new Configuration();
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);
    conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3);
    ShuffleHandler shuffle = new ShuffleHandler();
    // emulate aux services startup with recovery enabled
    shuffle.setRecoveryPath(new Path(tmpDir.toString()));
    tmpDir.mkdirs();
    try {
        shuffle.init(conf);
        shuffle.start();
        // setup a shuffle token for an application
        DataOutputBuffer outputBuffer = new DataOutputBuffer();
        outputBuffer.reset();
        Token<JobTokenIdentifier> jt = new Token<JobTokenIdentifier>("identifier".getBytes(), "password".getBytes(), new Text(user), new Text("shuffleService"));
        jt.write(outputBuffer);
        shuffle.initializeApplication(new ApplicationInitializationContext(user, appId, ByteBuffer.wrap(outputBuffer.getData(), 0, outputBuffer.getLength())));
        // verify we are authorized to shuffle
        int rc = getShuffleResponseCode(shuffle, jt);
        Assert.assertEquals(HttpURLConnection.HTTP_OK, rc);
        // emulate shuffle handler restart
        shuffle.close();
        shuffle = new ShuffleHandler();
        shuffle.setRecoveryPath(new Path(tmpDir.toString()));
        shuffle.init(conf);
        shuffle.start();
        // verify we are still authorized to shuffle to the old application
        rc = getShuffleResponseCode(shuffle, jt);
        Assert.assertEquals(HttpURLConnection.HTTP_OK, rc);
        Version version = Version.newInstance(1, 0);
        Assert.assertEquals(version, shuffle.getCurrentVersion());
        // emulate shuffle handler restart with compatible version
        Version version11 = Version.newInstance(1, 1);
        // update version info before close shuffle
        shuffle.storeVersion(version11);
        Assert.assertEquals(version11, shuffle.loadVersion());
        shuffle.close();
        shuffle = new ShuffleHandler();
        shuffle.setRecoveryPath(new Path(tmpDir.toString()));
        shuffle.init(conf);
        shuffle.start();
        // shuffle version will be override by CURRENT_VERSION_INFO after restart
        // successfully.
        Assert.assertEquals(version, shuffle.loadVersion());
        // verify we are still authorized to shuffle to the old application
        rc = getShuffleResponseCode(shuffle, jt);
        Assert.assertEquals(HttpURLConnection.HTTP_OK, rc);
        // emulate shuffle handler restart with incompatible version
        Version version21 = Version.newInstance(2, 1);
        shuffle.storeVersion(version21);
        Assert.assertEquals(version21, shuffle.loadVersion());
        shuffle.close();
        shuffle = new ShuffleHandler();
        shuffle.setRecoveryPath(new Path(tmpDir.toString()));
        shuffle.init(conf);
        try {
            shuffle.start();
            Assert.fail("Incompatible version, should expect fail here.");
        } catch (ServiceStateException e) {
            Assert.assertTrue("Exception message mismatch", e.getMessage().contains("Incompatible version for state DB schema:"));
        }
    } finally {
        if (shuffle != null) {
            shuffle.close();
        }
        FileUtil.fullyDelete(tmpDir);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) JobTokenIdentifier(org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier) Token(org.apache.hadoop.security.token.Token) Text(org.apache.hadoop.io.Text) Version(org.apache.hadoop.yarn.server.records.Version) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) ServiceStateException(org.apache.hadoop.service.ServiceStateException) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) ApplicationInitializationContext(org.apache.hadoop.yarn.server.api.ApplicationInitializationContext) Test(org.junit.Test)

Example 52 with ApplicationId

use of org.apache.hadoop.yarn.api.records.ApplicationId in project hadoop by apache.

the class TestShuffleHandler method testGetMapOutputInfo.

@Test(timeout = 100000)
public void testGetMapOutputInfo() throws Exception {
    final ArrayList<Throwable> failures = new ArrayList<Throwable>(1);
    Configuration conf = new Configuration();
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);
    conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3);
    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "simple");
    UserGroupInformation.setConfiguration(conf);
    File absLogDir = new File("target", TestShuffleHandler.class.getSimpleName() + "LocDir").getAbsoluteFile();
    conf.set(YarnConfiguration.NM_LOCAL_DIRS, absLogDir.getAbsolutePath());
    ApplicationId appId = ApplicationId.newInstance(12345, 1);
    String appAttemptId = "attempt_12345_1_m_1_0";
    String user = "randomUser";
    String reducerId = "0";
    List<File> fileMap = new ArrayList<File>();
    createShuffleHandlerFiles(absLogDir, user, appId.toString(), appAttemptId, conf, fileMap);
    ShuffleHandler shuffleHandler = new ShuffleHandler() {

        @Override
        protected Shuffle getShuffle(Configuration conf) {
            // replace the shuffle handler with one stubbed for testing
            return new Shuffle(conf) {

                @Override
                protected void populateHeaders(List<String> mapIds, String outputBaseStr, String user, int reduce, HttpRequest request, HttpResponse response, boolean keepAliveParam, Map<String, MapOutputInfo> infoMap) throws IOException {
                    // Only set response headers and skip everything else
                    // send some dummy value for content-length
                    super.setResponseHeaders(response, keepAliveParam, 100);
                }

                @Override
                protected void verifyRequest(String appid, ChannelHandlerContext ctx, HttpRequest request, HttpResponse response, URL requestUri) throws IOException {
                // Do nothing.
                }

                @Override
                protected void sendError(ChannelHandlerContext ctx, String message, HttpResponseStatus status) {
                    if (failures.size() == 0) {
                        failures.add(new Error(message));
                        ctx.getChannel().close();
                    }
                }

                @Override
                protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch, String user, String mapId, int reduce, MapOutputInfo info) throws IOException {
                    // send a shuffle header
                    ShuffleHeader header = new ShuffleHeader("attempt_12345_1_m_1_0", 5678, 5678, 1);
                    DataOutputBuffer dob = new DataOutputBuffer();
                    header.write(dob);
                    return ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength()));
                }
            };
        }
    };
    shuffleHandler.init(conf);
    try {
        shuffleHandler.start();
        DataOutputBuffer outputBuffer = new DataOutputBuffer();
        outputBuffer.reset();
        Token<JobTokenIdentifier> jt = new Token<JobTokenIdentifier>("identifier".getBytes(), "password".getBytes(), new Text(user), new Text("shuffleService"));
        jt.write(outputBuffer);
        shuffleHandler.initializeApplication(new ApplicationInitializationContext(user, appId, ByteBuffer.wrap(outputBuffer.getData(), 0, outputBuffer.getLength())));
        URL url = new URL("http://127.0.0.1:" + shuffleHandler.getConfig().get(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY) + "/mapOutput?job=job_12345_0001&reduce=" + reducerId + "&map=attempt_12345_1_m_1_0");
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_NAME, ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
        conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_VERSION, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
        conn.connect();
        try {
            DataInputStream is = new DataInputStream(conn.getInputStream());
            ShuffleHeader header = new ShuffleHeader();
            header.readFields(is);
            is.close();
        } catch (EOFException e) {
        // ignore
        }
        Assert.assertEquals("sendError called due to shuffle error", 0, failures.size());
    } finally {
        shuffleHandler.stop();
        FileUtil.fullyDelete(absLogDir);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) ArrayList(java.util.ArrayList) ChannelHandlerContext(org.jboss.netty.channel.ChannelHandlerContext) Token(org.apache.hadoop.security.token.Token) URL(java.net.URL) HttpURLConnection(java.net.HttpURLConnection) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) EOFException(java.io.EOFException) List(java.util.List) ArrayList(java.util.ArrayList) ApplicationInitializationContext(org.apache.hadoop.yarn.server.api.ApplicationInitializationContext) HttpRequest(org.jboss.netty.handler.codec.http.HttpRequest) SocketChannel(org.jboss.netty.channel.socket.SocketChannel) Channel(org.jboss.netty.channel.Channel) AbstractChannel(org.jboss.netty.channel.AbstractChannel) ShuffleHeader(org.apache.hadoop.mapreduce.task.reduce.ShuffleHeader) DefaultHttpResponse(org.jboss.netty.handler.codec.http.DefaultHttpResponse) HttpResponse(org.jboss.netty.handler.codec.http.HttpResponse) JobTokenIdentifier(org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier) Text(org.apache.hadoop.io.Text) DataInputStream(java.io.DataInputStream) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) Map(java.util.Map) Test(org.junit.Test)

Example 53 with ApplicationId

use of org.apache.hadoop.yarn.api.records.ApplicationId in project hadoop by apache.

the class TestShuffleHandler method testRecovery.

@Test
public void testRecovery() throws IOException {
    final String user = "someuser";
    final ApplicationId appId = ApplicationId.newInstance(12345, 1);
    final JobID jobId = JobID.downgrade(TypeConverter.fromYarn(appId));
    final File tmpDir = new File(System.getProperty("test.build.data", System.getProperty("java.io.tmpdir")), TestShuffleHandler.class.getName());
    Configuration conf = new Configuration();
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);
    conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3);
    ShuffleHandler shuffle = new ShuffleHandler();
    // emulate aux services startup with recovery enabled
    shuffle.setRecoveryPath(new Path(tmpDir.toString()));
    tmpDir.mkdirs();
    try {
        shuffle.init(conf);
        shuffle.start();
        // setup a shuffle token for an application
        DataOutputBuffer outputBuffer = new DataOutputBuffer();
        outputBuffer.reset();
        Token<JobTokenIdentifier> jt = new Token<JobTokenIdentifier>("identifier".getBytes(), "password".getBytes(), new Text(user), new Text("shuffleService"));
        jt.write(outputBuffer);
        shuffle.initializeApplication(new ApplicationInitializationContext(user, appId, ByteBuffer.wrap(outputBuffer.getData(), 0, outputBuffer.getLength())));
        // verify we are authorized to shuffle
        int rc = getShuffleResponseCode(shuffle, jt);
        Assert.assertEquals(HttpURLConnection.HTTP_OK, rc);
        // emulate shuffle handler restart
        shuffle.close();
        shuffle = new ShuffleHandler();
        shuffle.setRecoveryPath(new Path(tmpDir.toString()));
        shuffle.init(conf);
        shuffle.start();
        // verify we are still authorized to shuffle to the old application
        rc = getShuffleResponseCode(shuffle, jt);
        Assert.assertEquals(HttpURLConnection.HTTP_OK, rc);
        // shutdown app and verify access is lost
        shuffle.stopApplication(new ApplicationTerminationContext(appId));
        rc = getShuffleResponseCode(shuffle, jt);
        Assert.assertEquals(HttpURLConnection.HTTP_UNAUTHORIZED, rc);
        // emulate shuffle handler restart
        shuffle.close();
        shuffle = new ShuffleHandler();
        shuffle.setRecoveryPath(new Path(tmpDir.toString()));
        shuffle.init(conf);
        shuffle.start();
        // verify we still don't have access
        rc = getShuffleResponseCode(shuffle, jt);
        Assert.assertEquals(HttpURLConnection.HTTP_UNAUTHORIZED, rc);
    } finally {
        if (shuffle != null) {
            shuffle.close();
        }
        FileUtil.fullyDelete(tmpDir);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) JobTokenIdentifier(org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier) Token(org.apache.hadoop.security.token.Token) Text(org.apache.hadoop.io.Text) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) ApplicationInitializationContext(org.apache.hadoop.yarn.server.api.ApplicationInitializationContext) ApplicationTerminationContext(org.apache.hadoop.yarn.server.api.ApplicationTerminationContext) Test(org.junit.Test)

Example 54 with ApplicationId

use of org.apache.hadoop.yarn.api.records.ApplicationId in project hadoop by apache.

the class TestShuffleHandler method testMapFileAccess.

/**
   * Validate the ownership of the map-output files being pulled in. The
   * local-file-system owner of the file should match the user component in the
   *
   * @throws Exception exception
   */
@Test(timeout = 100000)
public void testMapFileAccess() throws IOException {
    // This will run only in NativeIO is enabled as SecureIOUtils need it
    assumeTrue(NativeIO.isAvailable());
    Configuration conf = new Configuration();
    conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0);
    conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3);
    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
    UserGroupInformation.setConfiguration(conf);
    File absLogDir = new File("target", TestShuffleHandler.class.getSimpleName() + "LocDir").getAbsoluteFile();
    conf.set(YarnConfiguration.NM_LOCAL_DIRS, absLogDir.getAbsolutePath());
    ApplicationId appId = ApplicationId.newInstance(12345, 1);
    LOG.info(appId.toString());
    String appAttemptId = "attempt_12345_1_m_1_0";
    String user = "randomUser";
    String reducerId = "0";
    List<File> fileMap = new ArrayList<File>();
    createShuffleHandlerFiles(absLogDir, user, appId.toString(), appAttemptId, conf, fileMap);
    ShuffleHandler shuffleHandler = new ShuffleHandler() {

        @Override
        protected Shuffle getShuffle(Configuration conf) {
            // replace the shuffle handler with one stubbed for testing
            return new Shuffle(conf) {

                @Override
                protected void verifyRequest(String appid, ChannelHandlerContext ctx, HttpRequest request, HttpResponse response, URL requestUri) throws IOException {
                // Do nothing.
                }
            };
        }
    };
    shuffleHandler.init(conf);
    try {
        shuffleHandler.start();
        DataOutputBuffer outputBuffer = new DataOutputBuffer();
        outputBuffer.reset();
        Token<JobTokenIdentifier> jt = new Token<JobTokenIdentifier>("identifier".getBytes(), "password".getBytes(), new Text(user), new Text("shuffleService"));
        jt.write(outputBuffer);
        shuffleHandler.initializeApplication(new ApplicationInitializationContext(user, appId, ByteBuffer.wrap(outputBuffer.getData(), 0, outputBuffer.getLength())));
        URL url = new URL("http://127.0.0.1:" + shuffleHandler.getConfig().get(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY) + "/mapOutput?job=job_12345_0001&reduce=" + reducerId + "&map=attempt_12345_1_m_1_0");
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_NAME, ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
        conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_VERSION, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
        conn.connect();
        byte[] byteArr = new byte[10000];
        try {
            DataInputStream is = new DataInputStream(conn.getInputStream());
            is.readFully(byteArr);
        } catch (EOFException e) {
        // ignore
        }
        // Retrieve file owner name
        FileInputStream is = new FileInputStream(fileMap.get(0));
        String owner = NativeIO.POSIX.getFstat(is.getFD()).getOwner();
        is.close();
        String message = "Owner '" + owner + "' for path " + fileMap.get(0).getAbsolutePath() + " did not match expected owner '" + user + "'";
        Assert.assertTrue((new String(byteArr)).contains(message));
    } finally {
        shuffleHandler.stop();
        FileUtil.fullyDelete(absLogDir);
    }
}
Also used : HttpRequest(org.jboss.netty.handler.codec.http.HttpRequest) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ArrayList(java.util.ArrayList) DefaultHttpResponse(org.jboss.netty.handler.codec.http.DefaultHttpResponse) HttpResponse(org.jboss.netty.handler.codec.http.HttpResponse) JobTokenIdentifier(org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier) ChannelHandlerContext(org.jboss.netty.channel.ChannelHandlerContext) Token(org.apache.hadoop.security.token.Token) Text(org.apache.hadoop.io.Text) DataInputStream(java.io.DataInputStream) URL(java.net.URL) FileInputStream(java.io.FileInputStream) HttpURLConnection(java.net.HttpURLConnection) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) EOFException(java.io.EOFException) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) ApplicationInitializationContext(org.apache.hadoop.yarn.server.api.ApplicationInitializationContext) Test(org.junit.Test)

Example 55 with ApplicationId

use of org.apache.hadoop.yarn.api.records.ApplicationId in project hadoop by apache.

the class ShuffleHandler method stopApplication.

@Override
public void stopApplication(ApplicationTerminationContext context) {
    ApplicationId appId = context.getApplicationId();
    JobID jobId = new JobID(Long.toString(appId.getClusterTimestamp()), appId.getId());
    try {
        removeJobShuffleInfo(jobId);
    } catch (IOException e) {
        LOG.error("Error during stopApp", e);
    // TODO add API to AuxiliaryServices to report failures
    }
}
Also used : IOException(java.io.IOException) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Aggregations

ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)648 Test (org.junit.Test)338 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)221 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)173 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)137 Configuration (org.apache.hadoop.conf.Configuration)127 IOException (java.io.IOException)119 Path (org.apache.hadoop.fs.Path)107 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)102 ArrayList (java.util.ArrayList)96 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)71 HashMap (java.util.HashMap)65 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)61 Resource (org.apache.hadoop.yarn.api.records.Resource)57 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)53 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)51 File (java.io.File)49 Credentials (org.apache.hadoop.security.Credentials)49 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)47 ApplicationSubmissionContext (org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext)47