use of org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader in project tez by apache.
the class TestValuesIterator method createInMemStreams.
/**
* create inmemory segments
*
* @return
* @throws IOException
*/
@SuppressWarnings("unchecked")
public List<TezMerger.Segment> createInMemStreams() throws IOException {
int numberOfStreams = Math.max(2, rnd.nextInt(10));
LOG.info("No of streams : " + numberOfStreams);
SerializationFactory serializationFactory = new SerializationFactory(conf);
Serializer keySerializer = serializationFactory.getSerializer(keyClass);
Serializer valueSerializer = serializationFactory.getSerializer(valClass);
LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
InputContext context = createTezInputContext();
MergeManager mergeManager = new MergeManager(conf, fs, localDirAllocator, context, null, null, null, null, null, 1024 * 1024 * 10, null, false, -1);
DataOutputBuffer keyBuf = new DataOutputBuffer();
DataOutputBuffer valBuf = new DataOutputBuffer();
DataInputBuffer keyIn = new DataInputBuffer();
DataInputBuffer valIn = new DataInputBuffer();
keySerializer.open(keyBuf);
valueSerializer.open(valBuf);
List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>();
for (int i = 0; i < numberOfStreams; i++) {
BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
InMemoryWriter writer = new InMemoryWriter(bout);
Map<Writable, Writable> data = createData();
// write data
for (Map.Entry<Writable, Writable> entry : data.entrySet()) {
keySerializer.serialize(entry.getKey());
valueSerializer.serialize(entry.getValue());
keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength());
valIn.reset(valBuf.getData(), 0, valBuf.getLength());
writer.append(keyIn, valIn);
originalData.put(entry.getKey(), entry.getValue());
keyBuf.reset();
valBuf.reset();
keyIn.reset();
valIn.reset();
}
IFile.Reader reader = new InMemoryReader(mergeManager, null, bout.getBuffer(), 0, bout.getBuffer().length);
segments.add(new TezMerger.Segment(reader, null));
data.clear();
writer.close();
}
return segments;
}
use of org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader in project tez by apache.
the class TestTezMerger method createInMemorySegments.
private List<TezMerger.Segment> createInMemorySegments(int segmentCount, int keysPerSegment) throws IOException {
List<TezMerger.Segment> segmentList = Lists.newLinkedList();
Random rnd = new Random();
DataInputBuffer key = new DataInputBuffer();
DataInputBuffer value = new DataInputBuffer();
for (int i = 0; i < segmentCount; i++) {
BoundedByteArrayOutputStream stream = new BoundedByteArrayOutputStream(10000);
InMemoryWriter writer = new InMemoryWriter(stream);
for (int j = 0; j < keysPerSegment; j++) {
populateData(new IntWritable(rnd.nextInt()), new LongWritable(rnd.nextLong()), key, value);
writer.append(key, value);
}
writer.close();
InMemoryReader reader = new InMemoryReader(merger, null, stream.getBuffer(), 0, stream.getLimit());
segmentList.add(new TezMerger.Segment(reader, null));
}
return segmentList;
}
use of org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader in project tez by apache.
the class TestIFile method testConcatenatedZlibPadding.
// test concatenated zlib input - as in multiple map outputs during shuffle
// This specific input is valid but the decompressor can leave lingering
// bytes between segments. If the lingering bytes aren't handled correctly,
// the stream will get out-of-sync.
@Test(timeout = 5000)
public void testConcatenatedZlibPadding() throws IOException, URISyntaxException {
byte[] bytes;
long compTotal = 0;
// Known raw and compressed lengths of input
long[] raws = { 2392, 102314, 42576, 31432, 25090 };
long[] compressed = { 723, 25396, 10926, 8203, 6665 };
CompressionCodecFactory codecFactory = new CompressionCodecFactory(new Configuration());
codec = codecFactory.getCodecByClassName("org.apache.hadoop.io.compress.DefaultCodec");
URL url = getClass().getClassLoader().getResource("TestIFile_concatenated_compressed.bin");
assertNotEquals("IFileinput file must exist", null, url);
Path p = new Path(url.toURI());
FSDataInputStream inStream = localFs.open(p);
for (int i = 0; i < 5; i++) {
bytes = new byte[(int) raws[i]];
assertEquals("Compressed stream out-of-sync", inStream.getPos(), compTotal);
IFile.Reader.readToMemory(bytes, inStream, (int) compressed[i], codec, false, -1);
compTotal += compressed[i];
// Now read the data
InMemoryReader inMemReader = new InMemoryReader(null, new InputAttemptIdentifier(0, 0), bytes, 0, bytes.length);
DataInputBuffer keyIn = new DataInputBuffer();
DataInputBuffer valIn = new DataInputBuffer();
Deserializer<Text> keyDeserializer;
Deserializer<IntWritable> valDeserializer;
SerializationFactory serializationFactory = new SerializationFactory(defaultConf);
keyDeserializer = serializationFactory.getDeserializer(Text.class);
valDeserializer = serializationFactory.getDeserializer(IntWritable.class);
keyDeserializer.open(keyIn);
valDeserializer.open(valIn);
while (inMemReader.nextRawKey(keyIn)) {
inMemReader.nextRawValue(valIn);
}
}
inStream.close();
}
use of org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader in project tez by apache.
the class TestIFile method readUsingInMemoryReader.
private void readUsingInMemoryReader(byte[] bytes, List<KVPair> originalData) throws IOException {
InMemoryReader inMemReader = new InMemoryReader(null, new InputAttemptIdentifier(0, 0), bytes, 0, bytes.length);
verifyData(inMemReader, originalData);
}
Aggregations