Search in sources :

Example 66 with DataOutputBuffer

use of org.apache.hadoop.io.DataOutputBuffer in project samza by apache.

the class YarnContainerRunner method startContainer.

/**
   *    Runs a command as a process on the container. All binaries needed by the physical process are packaged in the URL
   *    specified by packagePath.
   */
private void startContainer(Path packagePath, Container container, Map<String, String> env, final String cmd) throws SamzaContainerLaunchException {
    log.info("starting container {} {} {} {}", new Object[] { packagePath, container, env, cmd });
    // TODO: SAMZA-1144 remove the customized approach for package resource and use the common one.
    // But keep it now for backward compatibility.
    // set the local package so that the containers and app master are provisioned with it
    LocalResource packageResource = Records.newRecord(LocalResource.class);
    URL packageUrl = ConverterUtils.getYarnUrlFromPath(packagePath);
    FileStatus fileStatus;
    try {
        fileStatus = packagePath.getFileSystem(yarnConfiguration).getFileStatus(packagePath);
    } catch (IOException ioe) {
        log.error("IO Exception when accessing the package status from the filesystem", ioe);
        throw new SamzaContainerLaunchException("IO Exception when accessing the package status from the filesystem");
    }
    packageResource.setResource(packageUrl);
    log.info("set package Resource in YarnContainerRunner for {}", packageUrl);
    packageResource.setSize(fileStatus.getLen());
    packageResource.setTimestamp(fileStatus.getModificationTime());
    packageResource.setType(LocalResourceType.ARCHIVE);
    packageResource.setVisibility(LocalResourceVisibility.APPLICATION);
    ByteBuffer allTokens;
    // copy tokens (copied from dist shell example)
    try {
        Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        // now remove the AM->RM token so that containers cannot access it
        Iterator iter = credentials.getAllTokens().iterator();
        while (iter.hasNext()) {
            TokenIdentifier token = ((Token) iter.next()).decodeIdentifier();
            if (token != null && token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
                iter.remove();
            }
        }
        allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
    } catch (IOException ioe) {
        log.error("IOException when writing credentials.", ioe);
        throw new SamzaContainerLaunchException("IO Exception when writing credentials to output buffer");
    }
    Map<String, LocalResource> localResourceMap = new HashMap<>();
    localResourceMap.put("__package", packageResource);
    // include the resources from the universal resource configurations
    LocalizerResourceMapper resourceMapper = new LocalizerResourceMapper(new LocalizerResourceConfig(config), yarnConfiguration);
    localResourceMap.putAll(resourceMapper.getResourceMap());
    ContainerLaunchContext context = Records.newRecord(ContainerLaunchContext.class);
    context.setEnvironment(env);
    context.setTokens(allTokens.duplicate());
    context.setCommands(new ArrayList<String>() {

        {
            add(cmd);
        }
    });
    context.setLocalResources(localResourceMap);
    log.debug("setting localResourceMap to {}", localResourceMap);
    log.debug("setting context to {}", context);
    StartContainerRequest startContainerRequest = Records.newRecord(StartContainerRequest.class);
    startContainerRequest.setContainerLaunchContext(context);
    try {
        nmClient.startContainer(container, context);
    } catch (YarnException ye) {
        log.error("Received YarnException when starting container: " + container.getId(), ye);
        throw new SamzaContainerLaunchException("Received YarnException when starting container: " + container.getId(), ye);
    } catch (IOException ioe) {
        log.error("Received IOException when starting container: " + container.getId(), ioe);
        throw new SamzaContainerLaunchException("Received IOException when starting container: " + container.getId(), ioe);
    }
}
Also used : AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) TokenIdentifier(org.apache.hadoop.security.token.TokenIdentifier) FileStatus(org.apache.hadoop.fs.FileStatus) HashMap(java.util.HashMap) Token(org.apache.hadoop.security.token.Token) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) URL(org.apache.hadoop.yarn.api.records.URL) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) StartContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest) SamzaContainerLaunchException(org.apache.samza.clustermanager.SamzaContainerLaunchException) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Iterator(java.util.Iterator) Credentials(org.apache.hadoop.security.Credentials)

Example 67 with DataOutputBuffer

use of org.apache.hadoop.io.DataOutputBuffer in project brisk by riptano.

the class ThriftUtils method toThrift.

public static ThriftDelegationToken toThrift(Token<? extends AbstractDelegationTokenIdentifier> delegationToken, InetSocketAddress address) throws java.io.IOException {
    String serviceAddress = InetAddress.getByName(address.getHostName()).getHostAddress() + ":" + address.getPort();
    delegationToken.setService(new Text(serviceAddress));
    DataOutputBuffer out = new DataOutputBuffer();
    Credentials ts = new Credentials();
    ts.addToken(new Text(serviceAddress), delegationToken);
    ts.writeTokenStorageToStream(out);
    byte[] tokenData = new byte[out.getLength()];
    System.arraycopy(out.getData(), 0, tokenData, 0, tokenData.length);
    return new ThriftDelegationToken(ByteBuffer.wrap(tokenData));
}
Also used : DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Text(org.apache.hadoop.io.Text) Credentials(org.apache.hadoop.security.Credentials)

Example 68 with DataOutputBuffer

use of org.apache.hadoop.io.DataOutputBuffer in project ignite by apache.

the class IgniteYarnUtils method createTokenBuffer.

/**
 * Creates a ByteBuffer with serialized {@link Credentials}.
 *
 * @param creds The credentials.
 * @return The ByteBuffer with the credentials.
 * @throws IOException
 */
public static ByteBuffer createTokenBuffer(Credentials creds) throws IOException {
    DataOutputBuffer dob = new DataOutputBuffer();
    creds.writeTokenStorageToStream(dob);
    return ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
}
Also used : DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer)

Example 69 with DataOutputBuffer

use of org.apache.hadoop.io.DataOutputBuffer in project hive by apache.

the class VectorMapJoinBaseOperator method reProcessBigTable.

/**
 * For a vectorized row batch from the rows feed from the super MapJoinOperator.
 */
@Override
protected void reProcessBigTable(int partitionId) throws HiveException {
    if (scratchBatch == null) {
        // The process method was not called -- no big table rows.
        return;
    }
    HybridHashTableContainer.HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
    ObjectContainer bigTable = partition.getMatchfileObjContainer();
    DataOutputBuffer dataOutputBuffer = new DataOutputBuffer();
    while (bigTable.hasNext()) {
        Object row = bigTable.next();
        VectorizedBatchUtil.addProjectedRowToBatchFrom(row, (StructObjectInspector) inputObjInspectors[posBigTable], scratchBatch.size, scratchBatch, dataOutputBuffer);
        scratchBatch.size++;
        if (scratchBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
            // call process once we have a full batch
            process(scratchBatch, tag);
            scratchBatch.reset();
            dataOutputBuffer.reset();
        }
    }
    // Process the row batch that has less than DEFAULT_SIZE rows
    if (scratchBatch.size > 0) {
        process(scratchBatch, tag);
        scratchBatch.reset();
        dataOutputBuffer.reset();
    }
    bigTable.clear();
}
Also used : DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) ObjectContainer(org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer) HybridHashTableContainer(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)

Example 70 with DataOutputBuffer

use of org.apache.hadoop.io.DataOutputBuffer in project hive by apache.

the class DagUtils method createEdgeProperty.

/*
   * Helper function to create an edge property from an edge type.
   */
private EdgeProperty createEdgeProperty(Vertex w, TezEdgeProperty edgeProp, Configuration conf, BaseWork work, TezWork tezWork) throws IOException {
    MRHelpers.translateMRConfToTez(conf);
    String keyClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS);
    String valClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS);
    String partitionerClassName = conf.get("mapred.partitioner.class");
    Map<String, String> partitionerConf;
    EdgeType edgeType = edgeProp.getEdgeType();
    switch(edgeType) {
        case BROADCAST_EDGE:
            UnorderedKVEdgeConfig et1Conf = UnorderedKVEdgeConfig.newBuilder(keyClass, valClass).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
            return et1Conf.createDefaultBroadcastEdgeProperty();
        case CUSTOM_EDGE:
            assert partitionerClassName != null;
            partitionerConf = createPartitionerConf(partitionerClassName, conf);
            UnorderedPartitionedKVEdgeConfig et2Conf = UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
            EdgeManagerPluginDescriptor edgeDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName());
            CustomEdgeConfiguration edgeConf = new CustomEdgeConfiguration(edgeProp.getNumBuckets(), null);
            DataOutputBuffer dob = new DataOutputBuffer();
            edgeConf.write(dob);
            byte[] userPayload = dob.getData();
            edgeDesc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
            return et2Conf.createDefaultCustomEdgeProperty(edgeDesc);
        case CUSTOM_SIMPLE_EDGE:
            assert partitionerClassName != null;
            partitionerConf = createPartitionerConf(partitionerClassName, conf);
            UnorderedPartitionedKVEdgeConfig et3Conf = UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
            return et3Conf.createDefaultEdgeProperty();
        case ONE_TO_ONE_EDGE:
            UnorderedKVEdgeConfig et4Conf = UnorderedKVEdgeConfig.newBuilder(keyClass, valClass).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
            return et4Conf.createDefaultOneToOneEdgeProperty();
        case XPROD_EDGE:
            EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
            List<String> crossProductSources = new ArrayList<>();
            for (BaseWork parentWork : tezWork.getParents(work)) {
                if (EdgeType.XPROD_EDGE == tezWork.getEdgeType(parentWork, work)) {
                    crossProductSources.add(parentWork.getName());
                }
            }
            CartesianProductConfig cpConfig = new CartesianProductConfig(crossProductSources);
            edgeManagerDescriptor.setUserPayload(cpConfig.toUserPayload(new TezConfiguration(conf)));
            UnorderedPartitionedKVEdgeConfig cpEdgeConf = UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, ValueHashPartitioner.class.getName()).build();
            return cpEdgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);
        case SIMPLE_EDGE:
        // fallthrough
        default:
            assert partitionerClassName != null;
            partitionerConf = createPartitionerConf(partitionerClassName, conf);
            OrderedPartitionedKVEdgeConfig et5Conf = OrderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), TezBytesComparator.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
            return et5Conf.createDefaultEdgeProperty();
    }
}
Also used : OrderedPartitionedKVEdgeConfig(org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig) ArrayList(java.util.ArrayList) MRPartitioner(org.apache.tez.mapreduce.partition.MRPartitioner) EdgeType(org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType) TezBytesComparator(org.apache.tez.runtime.library.common.comparator.TezBytesComparator) UnorderedKVEdgeConfig(org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig) EdgeManagerPluginDescriptor(org.apache.tez.dag.api.EdgeManagerPluginDescriptor) CartesianProductEdgeManager(org.apache.tez.runtime.library.cartesianproduct.CartesianProductEdgeManager) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) UnorderedPartitionedKVEdgeConfig(org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig) TezBytesWritableSerialization(org.apache.tez.runtime.library.common.serializer.TezBytesWritableSerialization) CartesianProductConfig(org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) TezConfiguration(org.apache.tez.dag.api.TezConfiguration)

Aggregations

DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)132 Test (org.junit.Test)48 Credentials (org.apache.hadoop.security.Credentials)37 ByteBuffer (java.nio.ByteBuffer)36 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)36 IOException (java.io.IOException)34 Configuration (org.apache.hadoop.conf.Configuration)25 Token (org.apache.hadoop.security.token.Token)25 Path (org.apache.hadoop.fs.Path)21 HashMap (java.util.HashMap)20 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)20 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)18 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)16 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)16 Random (java.util.Random)15 DataInputStream (java.io.DataInputStream)14 Text (org.apache.hadoop.io.Text)14 ArrayList (java.util.ArrayList)13 Map (java.util.Map)10 FileSystem (org.apache.hadoop.fs.FileSystem)10