Search in sources :

Example 1 with MergeManager

use of org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager in project tez by apache.

the class TestValuesIterator method createInMemStreams.

/**
 * create inmemory segments
 *
 * @return
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public List<TezMerger.Segment> createInMemStreams() throws IOException {
    int numberOfStreams = Math.max(2, rnd.nextInt(10));
    LOG.info("No of streams : " + numberOfStreams);
    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Serializer keySerializer = serializationFactory.getSerializer(keyClass);
    Serializer valueSerializer = serializationFactory.getSerializer(valClass);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext context = createTezInputContext();
    MergeManager mergeManager = new MergeManager(conf, fs, localDirAllocator, context, null, null, null, null, null, 1024 * 1024 * 10, null, false, -1);
    DataOutputBuffer keyBuf = new DataOutputBuffer();
    DataOutputBuffer valBuf = new DataOutputBuffer();
    DataInputBuffer keyIn = new DataInputBuffer();
    DataInputBuffer valIn = new DataInputBuffer();
    keySerializer.open(keyBuf);
    valueSerializer.open(valBuf);
    List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>();
    for (int i = 0; i < numberOfStreams; i++) {
        BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
        InMemoryWriter writer = new InMemoryWriter(bout);
        Map<Writable, Writable> data = createData();
        // write data
        for (Map.Entry<Writable, Writable> entry : data.entrySet()) {
            keySerializer.serialize(entry.getKey());
            valueSerializer.serialize(entry.getValue());
            keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength());
            valIn.reset(valBuf.getData(), 0, valBuf.getLength());
            writer.append(keyIn, valIn);
            originalData.put(entry.getKey(), entry.getValue());
            keyBuf.reset();
            valBuf.reset();
            keyIn.reset();
            valIn.reset();
        }
        IFile.Reader reader = new InMemoryReader(mergeManager, null, bout.getBuffer(), 0, bout.getBuffer().length);
        segments.add(new TezMerger.Segment(reader, null));
        data.clear();
        writer.close();
    }
    return segments;
}
Also used : InMemoryReader(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader) IFile(org.apache.tez.runtime.library.common.sort.impl.IFile) InputContext(org.apache.tez.runtime.api.InputContext) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) MergeManager(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager) LinkedList(java.util.LinkedList) TezMerger(org.apache.tez.runtime.library.common.sort.impl.TezMerger) InMemoryWriter(org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) BoundedByteArrayOutputStream(org.apache.hadoop.io.BoundedByteArrayOutputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) Map(java.util.Map) TreeMap(java.util.TreeMap) Serializer(org.apache.hadoop.io.serializer.Serializer)

Aggregations

LinkedList (java.util.LinkedList)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)1 BoundedByteArrayOutputStream (org.apache.hadoop.io.BoundedByteArrayOutputStream)1 BytesWritable (org.apache.hadoop.io.BytesWritable)1 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)1 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)1 IntWritable (org.apache.hadoop.io.IntWritable)1 LongWritable (org.apache.hadoop.io.LongWritable)1 Writable (org.apache.hadoop.io.Writable)1 SerializationFactory (org.apache.hadoop.io.serializer.SerializationFactory)1 Serializer (org.apache.hadoop.io.serializer.Serializer)1 InputContext (org.apache.tez.runtime.api.InputContext)1 InMemoryReader (org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader)1 InMemoryWriter (org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter)1 MergeManager (org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager)1 IFile (org.apache.tez.runtime.library.common.sort.impl.IFile)1 TezMerger (org.apache.tez.runtime.library.common.sort.impl.TezMerger)1