Search in sources :

Example 86 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project hadoop by apache.

the class TestBlockToken method testEmptyLegacyBlockTokenBytesIsLegacy.

@Test
public void testEmptyLegacyBlockTokenBytesIsLegacy() throws IOException {
    BlockTokenIdentifier emptyIdent = new BlockTokenIdentifier();
    DataOutputBuffer dob = new DataOutputBuffer(4096);
    DataInputBuffer dib = new DataInputBuffer();
    emptyIdent.writeLegacy(dob);
    byte[] emptyIdentBytes = Arrays.copyOf(dob.getData(), dob.getLength());
    BlockTokenIdentifier legacyToken = new BlockTokenIdentifier();
    BlockTokenIdentifier protobufToken = new BlockTokenIdentifier();
    BlockTokenIdentifier readToken = new BlockTokenIdentifier();
    dib.reset(emptyIdentBytes, emptyIdentBytes.length);
    legacyToken.readFieldsLegacy(dib);
    boolean invalidProtobufMessage = false;
    try {
        dib.reset(emptyIdentBytes, emptyIdentBytes.length);
        protobufToken.readFieldsProtobuf(dib);
    } catch (IOException e) {
        invalidProtobufMessage = true;
    }
    assertTrue(invalidProtobufMessage);
    dib.reset(emptyIdentBytes, emptyIdentBytes.length);
    readToken.readFields(dib);
    assertTrue(invalidProtobufMessage);
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) IOException(java.io.IOException) Test(org.junit.Test)

Example 87 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project hadoop by apache.

the class TestBlockToken method testLegacyBlockTokenBytesIsLegacy.

@Test
public void testLegacyBlockTokenBytesIsLegacy() throws IOException {
    final boolean useProto = false;
    BlockTokenSecretManager sm = new BlockTokenSecretManager(blockKeyUpdateInterval, blockTokenLifetime, 0, 1, "fake-pool", null, useProto);
    Token<BlockTokenIdentifier> token = sm.generateToken(block1, EnumSet.noneOf(BlockTokenIdentifier.AccessMode.class));
    final byte[] tokenBytes = token.getIdentifier();
    BlockTokenIdentifier legacyToken = new BlockTokenIdentifier();
    BlockTokenIdentifier protobufToken = new BlockTokenIdentifier();
    BlockTokenIdentifier readToken = new BlockTokenIdentifier();
    DataInputBuffer dib = new DataInputBuffer();
    dib.reset(tokenBytes, tokenBytes.length);
    legacyToken.readFieldsLegacy(dib);
    boolean invalidProtobufMessage = false;
    try {
        dib.reset(tokenBytes, tokenBytes.length);
        protobufToken.readFieldsProtobuf(dib);
    } catch (IOException e) {
        invalidProtobufMessage = true;
    }
    assertTrue(invalidProtobufMessage);
    dib.reset(tokenBytes, tokenBytes.length);
    readToken.readFields(dib);
    // Using legacy, the token parses as a legacy block token and not a protobuf
    assertEquals(legacyToken, readToken);
    assertNotEquals(protobufToken, readToken);
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) IOException(java.io.IOException) Test(org.junit.Test)

Example 88 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project tez by apache.

the class ContainerRunnerImpl method submitWork.

/**
 * Submit an entire work unit - containerId + TaskSpec.
 * This is intended for a task push from the AM
 *
 * @param request
 * @throws org.apache.tez.dag.api.TezException
 */
@Override
public void submitWork(SubmitWorkRequestProto request) throws TezException {
    LOG.info("Queuing work for execution: " + request);
    checkAndThrowExceptionForTests(request);
    Map<String, String> env = new HashMap<String, String>();
    env.putAll(localEnv);
    env.put(ApplicationConstants.Environment.USER.name(), request.getUser());
    String[] localDirs = new String[localDirsBase.length];
    // Setup up local dirs to be application specific, and create them.
    for (int i = 0; i < localDirsBase.length; i++) {
        localDirs[i] = createAppSpecificLocalDir(localDirsBase[i], request.getApplicationIdString(), request.getUser());
        try {
            localFs.mkdirs(new Path(localDirs[i]));
        } catch (IOException e) {
            throw new TezException(e);
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Dirs are: " + Arrays.toString(localDirs));
    }
    // Setup workingDir. This is otherwise setup as Environment.PWD
    // Used for re-localization, to add the user specified configuration (conf_pb_binary_stream)
    String workingDir = localDirs[0];
    Credentials credentials = new Credentials();
    DataInputBuffer dib = new DataInputBuffer();
    byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
    dib.reset(tokenBytes, tokenBytes.length);
    try {
        credentials.readTokenStorageStream(dib);
    } catch (IOException e) {
        throw new TezException(e);
    }
    Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
    // TODO Unregistering does not happen at the moment, since there's no signals on when an app completes.
    LOG.info("Registering request with the ShuffleHandler for containerId {}", request.getContainerIdString());
    ShuffleHandler.get().registerApplication(request.getApplicationIdString(), jobToken, request.getUser());
    TaskRunnerCallable callable = new TaskRunnerCallable(request, new Configuration(getConfig()), new ExecutionContextImpl(localAddress.get().getHostName()), env, localDirs, workingDir, credentials, memoryPerExecutor, sharedExecutor);
    ListenableFuture<ContainerExecutionResult> future = executorService.submit(callable);
    Futures.addCallback(future, new TaskRunnerCallback(request, callable));
}
Also used : Path(org.apache.hadoop.fs.Path) TezException(org.apache.tez.dag.api.TezException) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) HashMap(java.util.HashMap) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) JobTokenIdentifier(org.apache.tez.common.security.JobTokenIdentifier) IOException(java.io.IOException) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Credentials(org.apache.hadoop.security.Credentials) ContainerExecutionResult(org.apache.tez.runtime.task.TezChild.ContainerExecutionResult)

Example 89 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project tez by apache.

the class ContainerRunnerImpl method queueContainer.

/**
 * Submit a container which is ready for running.
 * The regular pull mechanism will be used to fetch work from the AM
 * @param request
 * @throws TezException
 */
@Override
public void queueContainer(RunContainerRequestProto request) throws TezException {
    LOG.info("Queuing container for execution: " + request);
    Map<String, String> env = new HashMap<String, String>();
    env.putAll(localEnv);
    env.put(ApplicationConstants.Environment.USER.name(), request.getUser());
    String[] localDirs = new String[localDirsBase.length];
    // Setup up local dirs to be application specific, and create them.
    for (int i = 0; i < localDirsBase.length; i++) {
        localDirs[i] = createAppSpecificLocalDir(localDirsBase[i], request.getApplicationIdString(), request.getUser());
        try {
            localFs.mkdirs(new Path(localDirs[i]));
        } catch (IOException e) {
            throw new TezException(e);
        }
    }
    LOG.info("Dirs for {} are {}", request.getContainerIdString(), Arrays.toString(localDirs));
    // Setup workingDir. This is otherwise setup as Environment.PWD
    // Used for re-localization, to add the user specified configuration (conf_pb_binary_stream)
    String workingDir = localDirs[0];
    Credentials credentials = new Credentials();
    DataInputBuffer dib = new DataInputBuffer();
    byte[] tokenBytes = request.getCredentialsBinary().toByteArray();
    dib.reset(tokenBytes, tokenBytes.length);
    try {
        credentials.readTokenStorageStream(dib);
    } catch (IOException e) {
        throw new TezException(e);
    }
    Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
    // TODO Unregistering does not happen at the moment, since there's no signals on when an app completes.
    LOG.info("Registering request with the ShuffleHandler for containerId {}", request.getContainerIdString());
    ShuffleHandler.get().registerApplication(request.getApplicationIdString(), jobToken, request.getUser());
    ContainerRunnerCallable callable = new ContainerRunnerCallable(request, new Configuration(getConfig()), new ExecutionContextImpl(localAddress.get().getHostName()), env, localDirs, workingDir, credentials, memoryPerExecutor);
    ListenableFuture<ContainerExecutionResult> future = executorService.submit(callable);
    Futures.addCallback(future, new ContainerRunnerCallback(request, callable));
}
Also used : Path(org.apache.hadoop.fs.Path) TezException(org.apache.tez.dag.api.TezException) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) HashMap(java.util.HashMap) ExecutionContextImpl(org.apache.tez.runtime.api.impl.ExecutionContextImpl) JobTokenIdentifier(org.apache.tez.common.security.JobTokenIdentifier) IOException(java.io.IOException) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Credentials(org.apache.hadoop.security.Credentials) ContainerExecutionResult(org.apache.tez.runtime.task.TezChild.ContainerExecutionResult)

Example 90 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project tez by apache.

the class ValuesIterator method readNextKey.

/**
 * read the next key - which may be the same as the current key.
 */
private void readNextKey() throws IOException {
    more = in.next();
    if (more) {
        DataInputBuffer nextKeyBytes = in.getKey();
        if (!in.isSameKey()) {
            keyIn.reset(nextKeyBytes.getData(), nextKeyBytes.getPosition(), nextKeyBytes.getLength() - nextKeyBytes.getPosition());
            nextKey = keyDeserializer.deserialize(nextKey);
            // hasMoreValues = is it first key or is key the same?
            hasMoreValues = (key == null) || (comparator.compare(key, nextKey) == 0);
            if (key == null || false == hasMoreValues) {
                // so this indicates start of new key group
                if (inputKeyCounter != null) {
                    inputKeyCounter.increment(1);
                }
                ++keyCtr;
            }
        } else {
            hasMoreValues = in.isSameKey();
        }
    } else {
        hasMoreValues = false;
    }
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer)

Aggregations

DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)112 Test (org.junit.Test)49 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)45 IOException (java.io.IOException)24 Text (org.apache.hadoop.io.Text)20 Path (org.apache.hadoop.fs.Path)16 Configuration (org.apache.hadoop.conf.Configuration)13 IntWritable (org.apache.hadoop.io.IntWritable)11 Random (java.util.Random)10 DataInputStream (java.io.DataInputStream)9 BufferedInputStream (java.io.BufferedInputStream)8 HashMap (java.util.HashMap)8 DataOutputStream (java.io.DataOutputStream)6 LongWritable (org.apache.hadoop.io.LongWritable)6 SerializationFactory (org.apache.hadoop.io.serializer.SerializationFactory)6 IFile (org.apache.tez.runtime.library.common.sort.impl.IFile)6 BufferedOutputStream (java.io.BufferedOutputStream)5 BytesWritable (org.apache.hadoop.io.BytesWritable)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 Credentials (org.apache.hadoop.security.Credentials)4